Simplify.hs 18.8 KB
Newer Older
1
2
3
{- |
    Module      :  $Header$
    Description :  Optimizing the Desugared Code
4
5
6
    Copyright   :  (c) 2003        Wolfgang Lux
                                   Martin Engelke
                       2011 - 2015 Björn Peemöller
7
8
9
10
11
12
13
14
15
16
17
18
19
20
    License     :  OtherLicense

    Maintainer  :  bjp@informatik.uni-kiel.de
    Stability   :  experimental
    Portability :  portable

   After desugaring the source code, but before lifting local
   declarations, the compiler performs a few simple optimizations to
   improve the efficiency of the generated code. In addition, the
   optimizer replaces pattern bindings with simple variable bindings and
   selector functions.

   Currently, the following optimizations are implemented:

21
     * Under certain conditions, inline local function definitions.
22
     * Remove unused declarations.
23
24
     * Compute minimal binding groups for let expressions.
     * Remove pattern bindings to constructor terms
25
26
     * Inline simple constants.
-}
27
{-# LANGUAGE CPP #-}
28
29
module Transformations.Simplify (simplify) where

30
31
32
#if __GLASGOW_HASKELL__ < 710
import           Control.Applicative        ((<$>), (<*>))
#endif
33
34
import           Control.Monad.State as S   (State, runState, gets, modify)
import qualified Data.Map            as Map (Map, empty, insert, lookup)
35
36
37
38
39
40
41
42
43
44
45
46
47
48

import Curry.Base.Position
import Curry.Base.Ident
import Curry.Syntax

import Base.Expr
import Base.Messages (internalError)
import Base.SCC
import Base.Types
import Base.Typing
import Base.Utils (concatMapM)

import Env.Value (ValueEnv, ValueInfo (..), bindFun, qualLookupValue)

49
50
51
52
53
54
55
56
57
58
59
60
-- -----------------------------------------------------------------------------
-- Simplification
-- -----------------------------------------------------------------------------

simplify :: ValueEnv -> Module -> (Module, ValueEnv)
simplify tyEnv mdl@(Module _ m _ _ _) = (mdl', valueEnv s')
  where (mdl', s') = S.runState (simModule mdl) (SimplifyState m tyEnv 1)

-- -----------------------------------------------------------------------------
-- Internal state monad
-- -----------------------------------------------------------------------------

61
62
data SimplifyState = SimplifyState
  { moduleIdent :: ModuleIdent -- read-only!
63
  , valueEnv    :: ValueEnv    -- updated for new pattern selection functions
64
65
66
67
  , nextId      :: Int         -- counter
  }

type SIM = S.State SimplifyState
68

69
70
71
72
73
74
75
76
77
getModuleIdent :: SIM ModuleIdent
getModuleIdent = S.gets moduleIdent

getNextId :: SIM Int
getNextId = do
  nid <- S.gets nextId
  S.modify $ \s -> s { nextId = succ nid }
  return nid

78
79
80
getTypeOf :: Typeable t => t -> SIM Type
getTypeOf t = do
  tyEnv <- getValueEnv
81
  return (typeOf tyEnv t)
82

83
84
85
86
87
getFunArity :: QualIdent -> SIM Int
getFunArity f = do
  m     <- getModuleIdent
  tyEnv <- getValueEnv
  return $ case qualLookupValue f tyEnv of
88
89
90
91
    [Value _ a _] -> a
    _             -> case qualLookupValue (qualQualify m f) tyEnv of
      [Value _ a _] -> a
      _             -> internalError $ "Simplify.funType " ++ show f
92

93
94
95
96
97
98
modifyValueEnv :: (ValueEnv -> ValueEnv) -> SIM ()
modifyValueEnv f = S.modify $ \ s -> s { valueEnv = f $ valueEnv s }

getValueEnv :: SIM ValueEnv
getValueEnv = S.gets valueEnv

99
100
freshIdent :: (Int -> Ident) -> Int -> TypeScheme -> SIM Ident
freshIdent f arity ty = do
101
102
  m <- getModuleIdent
  x <- f <$> getNextId
103
  modifyValueEnv $ bindFun m x arity ty
104
105
106
107
108
  return x

-- -----------------------------------------------------------------------------
-- Simplification
-- -----------------------------------------------------------------------------
109

110
111
112
simModule :: Module -> SIM Module
simModule (Module ps m es is ds) = Module ps m es is
                                   <$> mapM (simDecl Map.empty) ds
113

114
115
116
-- Inline an expression for a variable
type InlineEnv = Map.Map Ident Expression

117
simDecl :: InlineEnv -> Decl -> SIM Decl
118
119
120
simDecl env (FunctionDecl p f eqs) = FunctionDecl p f
                                     <$> concatMapM (simEquation env) eqs
simDecl env (PatternDecl  p t rhs) = PatternDecl  p t <$> simRhs env rhs
121
122
simDecl _   d                      = return d

123
124
125
126
127
128
129
130
131
132
133
134
135
simEquation :: InlineEnv -> Equation -> SIM [Equation]
simEquation env (Equation p lhs rhs) = do
  rhs'  <- simRhs env rhs
  inlineFun env p lhs rhs'

simRhs :: InlineEnv -> Rhs -> SIM Rhs
simRhs env (SimpleRhs p e _) = simpleRhs p <$> simExpr env e
simRhs _   (GuardedRhs  _ _) = error "Simplify.simRhs: guarded rhs"

-- -----------------------------------------------------------------------------
-- Inlining of Functions
-- -----------------------------------------------------------------------------

136
137
-- After simplifying the right hand side of an equation, the compiler
-- transforms declarations of the form
138
--
139
140
141
--   f t_1 ... t_{k-l} x_{k-l+1} ... x_k =
--     let g y_1 ... y_l = e
--     in  g x_{k-l+1} ... x_k
142
--
143
-- into the equivalent definition
144
--
145
146
147
148
--   f t_1 ... t_{k-l} x_{k-l+1} x_k = let y_1   = x_{k-l+1}
--                                              ...
--                                         y_l   = x_k
--                                     in  e
149
--
150
151
152
153
154
155
-- where the arities of 'f' and 'g' are 'k' and 'l', respectively, and
-- 'x_{k-l+1}, ... ,x_k' are variables. The transformation can obviously be
-- generalized to the case where 'g' is defined by more than one equation.
-- However, we must be careful not to change the evaluation mode of arguments.
-- Therefore, the transformation is applied only all of the arguments of 'g'
-- are variables.
156
--
157
158
159
160
161
-- This transformation is actually just a special case of inlining a
-- (local) function definition. We are unable to handle the general case
-- because it would require to represent the pattern matching code
-- explicitly in a Curry expression.

162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
inlineFun :: InlineEnv -> Position -> Lhs -> Rhs -> SIM [Equation]
inlineFun env p lhs rhs = do
  m <- getModuleIdent
  case rhs of
    SimpleRhs _ (Let [FunctionDecl _ f' eqs'] e) _
      | -- @f'@ is not recursive
        f' `notElem` qfv m eqs'
        -- @f'@ does not perform any pattern matching
        && and [all isVarPattern ts1 | Equation _ (FunLhs _ ts1) _ <- eqs']
      -> do
        a <- getFunArity (qualify f')
        let (n, vs', e') = etaReduce 0 [] (reverse (snd $ flatLhs lhs)) e
        if  -- the eta-reduced rhs of @f@ is a call to @f'@
            e' == Variable (qualify f')
            -- @f'@ was fully applied before eta-reduction
            && n  == a
          then mapM (mergeEqns p vs') eqs'
          else return [Equation p lhs rhs]
    _ -> return [Equation p lhs rhs]
181
182
183
  where
  etaReduce n1 vs (VariablePattern v : ts1) (Apply e1 (Variable v'))
    | qualify v == v' = etaReduce (n1 + 1) (v:vs) ts1 e1
184
  etaReduce n1 vs _ e1 = (n1, vs, e1)
185

186
187
188
189
190
  mergeEqns p1 vs (Equation _ (FunLhs _ ts2) (SimpleRhs p2 e _))
    = Equation p1 lhs <$> simRhs env (simpleRhs p2 (Let ds e))
      where
      ds = zipWith (\t v -> PatternDecl p2 t (simpleRhs p2 (mkVar v))) ts2 vs
  mergeEqns _ _ _ = error "Simplify.inlineFun.mergeEqns: no pattern match"
191

192
193
194
-- -----------------------------------------------------------------------------
-- Simplification of Expressions
-- -----------------------------------------------------------------------------
195
196

-- Variables that are bound to (simple) constants and aliases to other
197
198
199
200
201
202
203
204
205
-- variables are substituted. In terms of conventional compiler technology,
-- these optimizations correspond to constant propagation and copy propagation,
-- respectively. The transformation is applied recursively to a substituted
-- variable in order to handle chains of variable definitions.

-- Applications of let-expressions and case-expressions to other expressions
-- are simplified according to the following rules:
--   (let ds in e_1)            e_2 -> let ds in (e1 e2)
--   (case e_1 of p'_n -> e'_n) e_2 -> case e_1 of p'_n -> (e'n e_2)
206
207
208
209
210

-- The bindings of a let expression are sorted topologically in
-- order to split them into minimal binding groups. In addition,
-- local declarations occurring on the right hand side of a pattern
-- declaration are lifted into the enclosing binding group using the
211
212
-- equivalence (modulo alpha-conversion) of 'let x = let ds in e_1 in e_2'
-- and 'let ds; x = e_1 in e_2'.
213
214
215
216
217
-- This transformation avoids the creation of some redundant lifted
-- functions in later phases of the compiler.

simExpr :: InlineEnv -> Expression -> SIM Expression
simExpr _   l@(Literal     _) = return l
218
simExpr _   c@(Constructor _) = return c
219
-- subsitution of variables
220
221
simExpr env v@(Variable    x)
  | isQualified x = return v
222
  | otherwise     = maybe (return v) (simExpr env) (Map.lookup (unqualify x) env)
223
-- simplification of application
224
simExpr env (Apply     e1 e2) = case e1 of
225
226
  Let ds e'       -> simExpr env (Let ds (Apply e' e2))
  Case r ct e' bs -> simExpr env (Case r ct e' (map (applyToAlt e2) bs))
227
228
229
  _               -> Apply <$> simExpr env e1 <*> simExpr env e2
  where
  applyToAlt e (Alt       p t rhs) = Alt p t (applyToRhs e rhs)
230
  applyToRhs e (SimpleRhs p e1' _) = simpleRhs p (Apply e1' e)
231
  applyToRhs _ (GuardedRhs    _ _) = error "Simplify.simExpr.applyRhs: Guarded rhs"
232
-- simplification of declarations
233
simExpr env (Let        ds e) = do
234
235
236
  m   <- getModuleIdent
  dss <- mapM sharePatternRhs ds
  simplifyLet env (scc bv (qfv m) (foldr hoistDecls [] (concat dss))) e
237
238
239
240
simExpr env (Case  r ct e bs) = Case r ct     <$> simExpr env e
                                              <*> mapM (simplifyAlt env) bs
simExpr env (Typed      e ty) = flip Typed ty <$> simExpr env e
simExpr _   _                 = error "Simplify.simExpr: no pattern match"
241

242
-- Simplify a case alternative
243
simplifyAlt :: InlineEnv -> Alt -> SIM Alt
244
simplifyAlt env (Alt p t rhs) = Alt p t <$> simRhs env rhs
245

246
247
-- Transform a pattern declaration @t = e@ into two declarations
-- @t = v, v = e@ whenever @t@ is not a variable. This is used to share
248
-- the expression @e@ using the fresh variable @v@.
249
250
251
252
253
sharePatternRhs :: Decl -> SIM [Decl]
sharePatternRhs (PatternDecl p t rhs) = case t of
  VariablePattern _ -> return [PatternDecl p t rhs]
  _                 -> do
    ty <- monoType <$> getTypeOf t
254
    v  <- addRefId (srcRefOf p) <$> freshIdent patternId 0 ty
255
256
257
258
259
260
261
262
    return [ PatternDecl p t                   (simpleRhs p (mkVar v))
           , PatternDecl p (VariablePattern v) rhs
           ]
  where patternId n = mkIdent ("_#pat" ++ show n)
sharePatternRhs d                     = return [d]

-- Lift up nested let declarations in pattern declarations, i.e., replace
-- @let p = let ds' in e'; ds in e@ by @let ds'; p = e'; ds in e@.
263
hoistDecls :: Decl -> [Decl] -> [Decl]
264
265
hoistDecls (PatternDecl p t (SimpleRhs p' (Let ds' e) _)) ds
 = foldr hoistDecls ds (PatternDecl p t (simpleRhs p' e) : ds')
266
267
268
269
270
271
272
273
274
hoistDecls d ds = d : ds

-- The declaration groups of a let expression are first processed from
-- outside to inside, simplifying the right hand sides and collecting
-- inlineable expressions on the fly. At present, only simple constants
-- and aliases to other variables are inlined. A constant is considered
-- simple if it is either a literal, a constructor, or a non-nullary
-- function. Note that it is not possible to define nullary functions in
-- local declarations in Curry. Thus, an unqualified name always refers
275
-- to either a variable or a non-nullary function. Applications of
276
277
278
-- constructors and partial applications of functions to at least one
-- argument are not inlined because the compiler has to allocate space
-- for them, anyway. In order to prevent non-termination, recursive
279
-- binding groups are not processed for inlining.
280
281
282
283

-- With the list of inlineable expressions, the body of the let is
-- simplified and then the declaration groups are processed from inside
-- to outside to construct the simplified, nested let expression. In
284
-- doing so, unused bindings are discarded. In addition, all pattern
285
286
287
288
289
290
-- bindings are replaced by simple variable declarations using selector
-- functions to access the pattern variables.

simplifyLet :: InlineEnv -> [[Decl]] -> Expression -> SIM Expression
simplifyLet env []       e = simExpr env e
simplifyLet env (ds:dss) e = do
291
  m     <- getModuleIdent
292
293
  ds'   <- mapM (simDecl env) ds  -- simplify declarations
  env'  <- inlineVars env ds'     -- inline a simple variable binding
294
295
  e'    <- simplifyLet env' dss e -- simplify remaining bindings
  ds''  <- concatMapM (expandPatternBindings (qfv m ds' ++ qfv m e')) ds'
296
297
  return $ foldr (mkLet m) e' (scc bv (qfv m) ds'')

298
299
inlineVars :: InlineEnv -> [Decl] -> SIM InlineEnv
inlineVars env ds = case ds of
300
301
302
303
  [PatternDecl _ (VariablePattern v) (SimpleRhs _ e _)] -> do
    allowed <- canInlineVar v e
    return $ if allowed then Map.insert v e env else env
  _ -> return env
304
  where
305
306
307
308
309
  canInlineVar _ (Literal     _) = return True
  canInlineVar _ (Constructor _) = return True
  canInlineVar v (Variable   v')
    | isQualified v'             = (> 0) <$> getFunArity v'
    | otherwise                  = return $ v /= unqualify v'
310
  canInlineVar _ _               = return False
311
312
313
314

mkLet :: ModuleIdent -> [Decl] -> Expression -> Expression
mkLet m [FreeDecl p vs] e
  | null vs'  = e
315
  | otherwise = Let [FreeDecl p vs'] e         -- remove unused free variables
316
317
  where vs' = filter (`elem` qfv m e) vs
mkLet m [PatternDecl _ (VariablePattern v) (SimpleRhs _ e _)] (Variable v')
318
  | v' == qualify v && v `notElem` qfv m e = e -- inline single binding
319
mkLet m ds e
320
321
  | null (filter (`elem` qfv m e) (bv ds)) = e -- removed unused bindings
  | otherwise                              = Let ds e
322
323
324
325

-- In order to implement lazy pattern matching in local declarations,
-- pattern declarations 't = e' where 't' is not a variable
-- are transformed into a list of declarations
326
-- 'v_0 = e; v_1 = f_1 v_0; ...; v_n = f_n v_0' where 'v_0' is a fresh
327
328
329
330
331
332
333
334
-- variable, 'v_1,...,v_n' are the variables occurring in 't' and the
-- auxiliary functions 'f_i' are defined by 'f_i t = v_i' (see also
-- appendix D.8 of the Curry report). The bindings 'v_0 = e' are introduced
-- before splitting the declaration groups of the enclosing let expression
-- (cf. the 'Let' case in 'simExpr' above) so that they are placed in their own
-- declaration group whenever possible. In particular, this ensures that
-- the new binding is discarded when the expression 'e' is itself a variable.

335
336
337
338
339
340
-- fvs contains all variables used in the declarations and the body
-- of the let expression.
expandPatternBindings :: [Ident] -> Decl -> SIM [Decl]
expandPatternBindings fvs d@(PatternDecl p t (SimpleRhs _ e _)) = case t of
  VariablePattern _ -> return [d]
  _                 -> do
341
342
    pty <- getTypeOf t -- type of pattern
    mapM (mkSelectorDecl pty) (filter (`elem` fvs) (bv t)) -- used variables
343
344
345
 where
  mkSelectorDecl pty v = do
    vty <- getTypeOf v
346
    f   <- freshIdent (updIdentName (++ '#' : idName v) . fpSelectorId) 1
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
                      (polyType (TypeArrow pty vty))
    return $ varDecl p v $ Let [funDecl p f [t] (mkVar v)] (Apply (mkVar f) e)
expandPatternBindings _ d = return [d]

-- ---------------------------------------------------------------------------
-- Auxiliary functions
-- ---------------------------------------------------------------------------

isVarPattern :: Pattern -> Bool
isVarPattern (VariablePattern      _) = True
isVarPattern (AsPattern          _ t) = isVarPattern t
isVarPattern (ConstructorPattern _ _) = False
isVarPattern (LiteralPattern       _) = False
isVarPattern _ = error "Simplify.isVarPattern: no pattern match"

mkVar :: Ident -> Expression
mkVar = Variable . qualify

simpleRhs :: Position -> Expression -> Rhs
simpleRhs p e = SimpleRhs p e []

varDecl :: Position -> Ident -> Expression -> Decl
varDecl p v e = PatternDecl p (VariablePattern v) (simpleRhs p e)

funDecl :: Position -> Ident -> [Pattern] -> Expression -> Decl
funDecl p f ts e = FunctionDecl p f [Equation p (FunLhs f ts) (simpleRhs p e)]

-- ---------------------------------------------------------------------------
375
-- Additional (obsolete) information
376
377
378
379
-- ---------------------------------------------------------------------------

-- Unfortunately, the transformation of pattern declarations introduces a
-- well-known space leak (Wadler87:Leaks,Sparud93:Leaks) because the matched
380
381
382
383
384
385
386
387
388
389
390
391
392
393
-- expression cannot be garbage collected until all of the matched
-- variables have been evaluated. Consider the following function:
--
--   f x | all (' ' ==) cs = c where (c:cs) = x
--
-- One might expect the call 'f (replicate 10000 ' ')' to execute in
-- constant space because (the tail of) the long list of blanks is
-- consumed and discarded immediately by 'all'. However, the
-- application of the selector function that extracts the head of the
-- list is not evaluated until after the guard has succeeded and thus
-- prevents the list from being garbage collected.

-- In order to avoid this space leak we use the approach
-- from (Sparud93:Leaks) and update all pattern variables when one
394
-- of the selector functions has been evaluated. Therefore, all pattern
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
-- variables except for the matched one are passed as additional
-- arguments to each of the selector functions. Thus, each of these
-- variables occurs twice in the argument list of a selector function,
-- once in the first argument and also as one of the remaining arguments.
-- This duplication of names is used by the compiler to insert the code
-- that updates the variables when generating abstract machine code.

-- By its very nature, this transformation introduces cyclic variable
-- bindings. Since cyclic bindings are not supported by PAKCS, we revert
-- to a simpler translation when generating FlatCurry output.

-- We will add only those pattern variables as additional arguments which
-- are actually used in the code. This reduces the number of auxiliary
-- variables and can prevent the introduction of a recursive binding
-- group when only a single variable is used. It is also the reason for
-- performing this transformation here instead of in the 'Desugar' module.
-- The selector functions are defined in a local declaration on
-- the right hand side of a projection declaration so that there is
-- exactly one declaration for each used variable.

-- Another problem of the translation scheme is the handling of pattern
-- variables with higher-order types, e.g.,
--
--   strange :: [a->a] -> Maybe (a->a)
--   strange xs = Just x
--     where (x:_) = xs
--
-- By reusing the types of the pattern variables, the selector function
-- 'f (x:_) = x' has type '[a->a] -> a -> a' and therefore seems to be
-- a binary function. Thus, in the goal 'strange []' the
-- selector is only applied partially and not evaluated. Note that this
-- goal will fail without the type annotation. In order to ensure that a
-- selector function is always evaluated when the corresponding variable
-- is used, we assume that the projection declarations -- ignoring the
-- additional arguments to prevent the space leak -- are actually defined
-- by 'f_i t = I v_i', using a private renaming type
431
--
432
--   newtype Identity a = I a
433
--
434
435
436
437
-- As newtype constructors are completely transparent to the compiler,
-- this does not change the generated code, but only the types of the
-- selector functions.

438
439
440
-- identityType :: Type -> Type
-- identityType = TypeConstructor qIdentityId . return
--   where qIdentityId = qualify (mkIdent "Identity")