Typing.hs 13.5 KB
Newer Older
1
2
3
4
{- |
    Module      :  $Header$
    Description :  Type computation of Curry expressions
    Copyright   :  (c) 2003 - 2006 Wolfgang Lux
5
                       2014 - 2015 Jan Tikovsky
6
7
8
9
10
11
12
13
    License     :  OtherLicense

    Maintainer  :  bjp@informatik.uni-kiel.de
    Stability   :  experimental
    Portability :  portable

-}

14
module Base.Typing (Typeable (..)) where
15
16

import Control.Monad
17
import qualified Control.Monad.State as S (State, evalState, gets, modify)
18
19
20
21
22
23
24
25
26
27

import Curry.Base.Ident
import Curry.Syntax

import Base.Messages (internalError)
import Base.TopEnv
import Base.Types
import Base.TypeSubst
import Base.Utils (foldr2)

28
import Env.Value ( ValueEnv, ValueInfo (..), lookupValue, qualLookupValue)
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90

-- During the transformation of Curry source code into the intermediate
-- language, the compiler has to recompute the types of expressions. This
-- is simpler than type checking because the types of all variables are
-- known. Yet, the compiler still must handle functions and constructors
-- with polymorphic types and instantiate their type schemes using fresh
-- type variables. Since all types computed by 'typeOf' are monomorphic,
-- we can use type variables with non-negative offsets for the instantiation
-- of type schemes here without risk of name conflicts.
-- Using non-negative offsets also makes it easy to distinguish these
-- fresh variables from free type variables introduced during type
-- inference, which must be regarded as constants here.

-- However, using non-negative offsets for fresh type variables gives
-- rise to two problems when those types are entered back into the type
-- environment, e.g., while introducing auxiliary variables during
-- desugaring. The first is that those type variables now appear to be
-- universally quantified variables, but with indices greater than the
-- number of quantified type variables (To be precise, this can
-- happen only for auxiliary variables, which have monomorphic types,
-- whereas auxiliary functions will be assigned polymorphic types and
-- these type variables will be properly quantified. However, in this
-- case the assigned types may be too general.).
-- This results in an internal error (Prelude.!!: index too large)
-- whenever such a type is instantiated.
-- The second problem is that there may be inadvertent name captures
-- because computeType always uses indices starting at 0 for the fresh
-- type variables. In order to avoid these problems, 'computeType' renames
-- all type variables with non-negative offsets after the final type has
-- been computed, using negative indices below the one with the smallest
-- value occurring in the type environment. Computing the minimum index
-- of all type variables in the type environment seems prohibitively
-- inefficient. However, recall that, thanks to laziness, the minimum is
-- computed only when the final type contains any type variables with
-- non-negative indices. This happens, for instance, 36 times while
-- compiling the prelude (for 159 evaluated applications of 'typeOf') and
-- only twice while compiling the standard IO module (for 21 applications
-- of 'typeOf') (These numbers were obtained for version 0.9.9.).

-- A careful reader will note that inadvertent name captures are still
-- possible if one computes the types of two or more auxiliary variables
-- before actually entering their types into the environment. Therefore,
-- make sure that you enter the types of these auxiliary variables
-- immediately into the type environment, unless you are sure that those
-- types cannot contain fresh type variables. One such case are the free
-- variables of a goal.

-- TODO: In the long run, this module should be made obsolete by adding
-- attributes to the abstract syntax tree -- e.g., along the lines of
-- Chap.~6 in~\cite{PeytonJonesLester92:Book} -- and returning an
-- abstract syntax tree attributed with type information together with
-- the type environment from type inference. This also would allow
-- getting rid of the identifiers in the representation of integer
-- literals, which are used in order to implement overloading of
-- integer constants.

-- TODO: When computing the type of an expression with a type signature
-- make use of the annotation instead of recomputing its type. In order
-- to do this, we must either ensure that the types are properly
-- qualified and expanded or we need access to the type constructor
-- environment.

91
92
93
94
95
data TcState = TcState
  { valueEnv  :: ValueEnv
  , typeSubst :: TypeSubst
  , nextId    :: Int
  }
96

97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
type TCM = S.State TcState

getValueEnv :: TCM ValueEnv
getValueEnv = S.gets valueEnv

getTypeSubst :: TCM TypeSubst
getTypeSubst = S.gets typeSubst

modifyTypeSubst :: (TypeSubst -> TypeSubst) -> TCM ()
modifyTypeSubst f = S.modify $ \s -> s { typeSubst = f $ typeSubst s }

getNextId :: TCM Int
getNextId = do
  nid <- S.gets nextId
  S.modify $ \ s -> s { nextId = succ nid }
  return nid

114
115
116
run :: TCM a -> ValueEnv -> a
run m tyEnv = S.evalState m initState
  where initState = TcState tyEnv idSubst 0
117
118

class Typeable a where
119
  typeOf :: ValueEnv -> a -> Type
120
121
122
123
124
125
126
127
128
129
130
131
132

instance Typeable Ident where
  typeOf = computeType identType

instance Typeable Pattern where
  typeOf = computeType argType

instance Typeable Expression where
  typeOf = computeType exprType

instance Typeable Rhs where
  typeOf = computeType rhsType

133
134
computeType :: (a -> TCM Type) -> ValueEnv -> a -> Type
computeType f tyEnv x = normalize (run doComputeType tyEnv)
135
136
137
138
139
  where
    doComputeType = do
      ty    <- f x
      theta <- getTypeSubst
      return (fixTypeVars tyEnv (subst theta ty))
140
141
142
143
144
145
146
147

fixTypeVars :: ValueEnv -> Type -> Type
fixTypeVars tyEnv ty = subst (foldr2 bindSubst idSubst tvs tvs') ty
  where tvs = filter (>= 0) (typeVars ty)
        tvs' = map TypeVariable [n - 1,n - 2 ..]
        n = minimum (0 : concatMap typeVars tys)
        tys = [ty1 | (_,Value _ _ (ForAll _ ty1)) <- localBindings tyEnv]

148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
identType :: Ident -> TCM Type
identType x = do
  tyEnv <- getValueEnv
  instUniv (varType x tyEnv)

litType :: Literal -> TCM Type
litType (Char _ _)   = return charType
litType (Int v _)    = identType v
litType (Float _ _)  = return floatType
litType (String _ _) = return stringType

argType :: Pattern -> TCM Type
argType (LiteralPattern l) = litType l
argType (NegativePattern _ l) = litType l
argType (VariablePattern v) = identType v
argType (ConstructorPattern c ts) = do
  tyEnv <- getValueEnv
  ty    <- instUnivExist (constrType c tyEnv)
  tys   <- mapM argType ts
  unifyList (init (flatten ty)) tys
  return (last (flatten ty))
169
170
  where flatten (TypeArrow ty1 ty2) = ty1 : flatten ty2
        flatten ty = [ty]
171
172
173
argType (InfixPattern t1 op t2) =
  argType (ConstructorPattern op [t1,t2])
argType (ParenPattern t) = argType t
174
175
176
177
178
argType (RecordPattern c fs) = do
  tyEnv <- getValueEnv
  ty    <- liftM arrowBase $ instUnivExist $ constrType c tyEnv
  mapM_ (fieldType argType ty) fs
  return ty
179
argType (TuplePattern _ ts)
180
  | null ts = return unitType
181
182
  | otherwise = liftM tupleType $ mapM argType ts
argType (ListPattern _ ts) = freshTypeVar >>= flip elemType ts
183
184
  where elemType ty [] = return (listType ty)
        elemType ty (t:ts1) =
185
186
187
188
189
190
191
192
193
          argType t >>= unify ty >> elemType ty ts1
argType (AsPattern v _) = argType (VariablePattern v)
argType (LazyPattern _ t) = argType t
argType (FunctionPattern f ts) = do
  tyEnv <- getValueEnv
  ty    <- instUniv (funType f tyEnv)
  tys   <- mapM argType ts
  unifyList (init (flatten ty)) tys
  return (last (flatten ty))
194
195
  where flatten (TypeArrow ty1 ty2) = ty1 : flatten ty2
        flatten ty = [ty]
196
197
198
199
200
201
202
203
204
205
206
207
argType (InfixFuncPattern t1 op t2) = argType (FunctionPattern op [t1,t2])

exprType :: Expression -> TCM Type
exprType (Literal l) = litType l
exprType (Variable v) = do
  tyEnv <- getValueEnv
  instUniv (funType v tyEnv)
exprType (Constructor c) = do
  tyEnv <- getValueEnv
  instUnivExist (constrType c tyEnv)
exprType (Typed e _) = exprType e
exprType (Paren e) = exprType e
208
209
210
211
212
213
214
215
216
exprType (Record c fs) = do
  tyEnv <- getValueEnv
  ty    <- liftM arrowBase $ instUnivExist $ constrType c tyEnv
  mapM_ (fieldType exprType ty) fs
  return ty
exprType (RecordUpdate e fs) = do
  ty <- exprType e
  mapM_ (fieldType exprType ty) fs
  return ty
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
exprType (Tuple _ es)
  | null es   = return unitType
  | otherwise = liftM tupleType $ mapM exprType es
exprType (List _ es) = freshTypeVar >>= flip elemType es
  where elemType ty []      = return (listType ty)
        elemType ty (e:es1) = exprType e >>= unify ty >> elemType ty es1
exprType (ListCompr _ e _) = liftM listType $ exprType e
exprType (EnumFrom _) = return (listType intType)
exprType (EnumFromThen _ _) = return (listType intType)
exprType (EnumFromTo _ _) = return (listType intType)
exprType (EnumFromThenTo _ _ _) = return (listType intType)
exprType (UnaryMinus _ e) = exprType e
exprType (Apply e1 e2) = do
  (ty1,ty2) <- exprType e1 >>= unifyArrow
  exprType e2 >>= unify ty1
  return ty2
exprType (InfixApply e1 op e2) = do
  (ty1,ty2,ty3) <- exprType (infixOp op) >>= unifyArrow2
  exprType e1 >>= unify ty1
  exprType e2 >>= unify ty2
  return ty3
exprType (LeftSection e op) = do
  (ty1,ty2,ty3) <- exprType (infixOp op) >>= unifyArrow2
  exprType e >>= unify ty1
  return (TypeArrow ty2 ty3)
exprType (RightSection op e) = do
  (ty1,ty2,ty3) <- exprType (infixOp op) >>= unifyArrow2
  exprType e >>= unify ty2
  return (TypeArrow ty1 ty3)
exprType (Lambda _ args e) = do
  tys <- mapM argType args
  ty  <- exprType e
  return (foldr TypeArrow ty tys)
exprType (Let _ e) = exprType e
exprType (Do _ e) = exprType e
exprType (IfThenElse _ e1 e2 e3) = do
  exprType e1 >>= unify boolType
  ty2 <- exprType e2
  ty3 <- exprType e3
  unify ty2 ty3
  return ty3
exprType (Case _ _ _ alts) = freshTypeVar >>= flip altType alts
259
260
  where altType ty [] = return ty
        altType ty (Alt _ _ rhs:alts1) =
261
262
263
264
265
          rhsType rhs >>= unify ty >> altType ty alts1

rhsType :: Rhs -> TCM Type
rhsType (SimpleRhs _ e _) = exprType e
rhsType (GuardedRhs es _) = freshTypeVar >>= flip condExprType es
266
267
  where condExprType ty [] = return ty
        condExprType ty (CondExpr _ _ e:es1) =
268
          exprType e >>= unify ty >> condExprType ty es1
269

270
271
fieldType :: (a -> TCM Type) -> Type -> Field a -> TCM Type
fieldType tcheck ty (Field _ l x) = do
272
  tyEnv <- getValueEnv
273
  TypeArrow ty1 ty2 <- instUniv (labelType l tyEnv)
274
275
276
277
  unify ty ty1
  lty <- tcheck x
  unify ty2 lty
  return lty
278
279
280
281
282

-- In order to avoid name conflicts with non-generalized type variables
-- in a type we instantiate quantified type variables using non-negative
-- offsets here.

283
284
freshTypeVar :: TCM Type
freshTypeVar = TypeVariable `liftM` getNextId
285

286
instType :: Int -> Type -> TCM Type
287
instType n ty = do
288
289
  tys <- replicateM n freshTypeVar
  return (expandAliasType tys ty)
290

291
instUniv :: TypeScheme -> TCM Type
292
293
instUniv (ForAll n ty) = instType n ty

294
instUnivExist :: ExistTypeScheme -> TCM Type
295
296
297
298
299
300
301
instUnivExist (ForAllExist n n' ty) = instType (n + n') ty

-- When unifying two types, the non-generalized variables, i.e.,
-- variables with negative offsets, must not be substituted. Otherwise,
-- the unification algorithm is identical to the one used by the type
-- checker.

302
303
304
305
306
307
unify :: Type -> Type -> TCM ()
unify ty1 ty2 = do
  theta <- getTypeSubst
  let ty1' = subst theta ty1
      ty2' = subst theta ty2
  modifyTypeSubst $ unifyTypes ty1' ty2'
308

309
310
unifyList :: [Type] -> [Type] -> TCM ()
unifyList tys1 tys2 = zipWithM_ unify tys1 tys2
311

312
unifyArrow :: Type -> TCM (Type,Type)
313
unifyArrow ty = do
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
  theta <- getTypeSubst
  case subst theta ty of
    TypeVariable tv
      | tv >= 0 -> do
        ty1 <- freshTypeVar
        ty2 <- freshTypeVar
        modifyTypeSubst (bindVar tv (TypeArrow ty1 ty2))
        return (ty1,ty2)
    TypeArrow ty1 ty2 -> return (ty1,ty2)
    ty' -> internalError ("Base.Typing.unifyArrow (" ++ show ty' ++ ")")

unifyArrow2 :: Type -> TCM (Type,Type,Type)
unifyArrow2 ty = do
  (ty1,ty2)   <- unifyArrow ty
  (ty21,ty22) <- unifyArrow ty2
  return (ty1,ty21,ty22)
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355

unifyTypes :: Type -> Type -> TypeSubst -> TypeSubst
unifyTypes (TypeVariable tv1) (TypeVariable tv2) theta
  | tv1 == tv2 = theta
unifyTypes (TypeVariable tv) ty theta
  | tv >= 0 = bindVar tv ty theta
unifyTypes ty (TypeVariable tv) theta
  | tv >= 0 = bindVar tv ty theta
unifyTypes (TypeConstructor tc1 tys1) (TypeConstructor tc2 tys2) theta
  | tc1 == tc2 = foldr2 unifyTypes theta tys1 tys2
unifyTypes (TypeConstrained _ tv1) (TypeConstrained _ tv2) theta
  | tv1 == tv2 = theta
unifyTypes (TypeArrow ty11 ty12) (TypeArrow ty21 ty22) theta =
  unifyTypes ty11 ty21 (unifyTypes ty12 ty22 theta)
unifyTypes (TypeSkolem k1) (TypeSkolem k2) theta
  | k1 == k2 = theta
unifyTypes ty1 ty2 _ = internalError $
  "Base.Typing.unify: (" ++ show ty1 ++ ") (" ++ show ty2 ++ ")"

-- The functions 'constrType', 'varType', and 'funType' are used for computing
-- the type of constructors, pattern variables, and variables.

-- TODO: These functions should be shared with the type checker.

constrType :: QualIdent -> ValueEnv -> ExistTypeScheme
constrType c tyEnv = case qualLookupValue c tyEnv of
356
357
  [DataConstructor  _ _ _ sigma] -> sigma
  [NewtypeConstructor _ _ sigma] -> sigma
358
359
360
361
362
  _ -> internalError $ "Base.Typing.constrType: " ++ show c

varType :: Ident -> ValueEnv -> TypeScheme
varType v tyEnv = case lookupValue v tyEnv of
  [Value _ _ sigma] -> sigma
Jan Rasmus Tikovsky 's avatar
Jan Rasmus Tikovsky committed
363
  [Label _ _ sigma] -> sigma
364
365
366
367
368
  _ -> internalError $ "Base.Typing.varType: " ++ show v

funType :: QualIdent -> ValueEnv -> TypeScheme
funType f tyEnv = case qualLookupValue f tyEnv of
  [Value _ _ sigma] -> sigma
Jan Rasmus Tikovsky 's avatar
Jan Rasmus Tikovsky committed
369
  [Label _ _ sigma] -> sigma
370
371
  _ -> internalError $ "Base.Typing.funType: " ++ show f

372
373
labelType :: QualIdent -> ValueEnv -> TypeScheme
labelType l tyEnv = case qualLookupValue l tyEnv of
374
375
  [Label _ _ sigma] -> sigma
  _ -> internalError $ "Base.Typing.labelType: " ++ show l