Commit 50d60cf5 authored by Björn Peemöller 's avatar Björn Peemöller
Browse files

Merge branch 'master' of /home/bjp/public_html/repos/curry-frontend

parents 62f8f62f d0526478
......@@ -53,17 +53,6 @@ Executable cymake
, Check.SyntaxCheck
, Check.TypeCheck
, Check.WarnCheck
, Curry.IL
, Curry.IL.Pretty
, Curry.IL.Type
, Curry.IL.XML
, Curry.Syntax
, Curry.Syntax.Lexer
, Curry.Syntax.Parser
, Curry.Syntax.Pretty
, Curry.Syntax.ShowModule
, Curry.Syntax.Type
, Curry.Syntax.Utils
, Env.CurryEnv
, Env.NestEnv
, Env.OldScopeEnv
......@@ -73,6 +62,10 @@ Executable cymake
, Gen.GenFlatCurry
, Html.CurryHtml
, Html.SyntaxColoring
, IL
, IL.Pretty
, IL.Type
, IL.XML
, CurryBuilder
, CompilerOpts
, CurryDeps
......
{- |A simple interface for reading and manipulating Curry source code.
(c) 2009, Holger Siegel
2011, Björn Peemöller
-}
module Curry.Syntax
( module Curry.Syntax.Type
, module Curry.Syntax.Utils
, lexFile
, parseHeader
, parseModule
, ppModule, ppIDecl
, showModule
) where
import Curry.Base.Position (Position, first)
import Curry.Base.MessageMonad (MsgMonad)
import Curry.Files.Unlit (unlit)
import qualified Curry.Syntax.Lexer as Lexer (Token, lexFile)
import qualified Curry.Syntax.Parser as Parser (parseHeader, parseSource)
import Curry.Syntax.Pretty (ppModule, ppIDecl)
import Curry.Syntax.ShowModule (showModule)
import Curry.Syntax.Type
import Curry.Syntax.Utils
{- |Return the result of a lexical analysis of the source program 'src'.
The result is a list of tuples consisting of a 'Position' and a 'Token'
-}
lexFile :: FilePath -> String -> MsgMonad [(Position, Lexer.Token)]
lexFile fn src = unlit fn src >>= \s -> Lexer.lexFile (first fn) s False []
-- | Parse a curry header
parseHeader :: FilePath -> String -> MsgMonad Module
parseHeader fn src = unlit fn src >>= Parser.parseHeader fn
-- | Parse a curry module
parseModule :: Bool -> FilePath -> String -> MsgMonad Module
parseModule likeFlat fn src = unlit fn src >>= Parser.parseSource likeFlat fn
% $Id: CurryLexer.lhs,v 1.40 2004/03/04 22:39:12 wlux Exp $
%
% Copyright (c) 1999-2004, Wolfgang Lux
% See LICENSE for the full license.
%
% Modified by Martin Engelke (men@informatik.uni-kiel.de)
% Modified by Bjoern Peemoeller (bjp@informatik.uni-kiel.de)
%
\nwfilename{CurryLexer.lhs}
\section{A Lexer for Curry}
In this section a lexer for Curry is implemented.
\begin{verbatim}
> module Curry.Syntax.Lexer
> ( -- * Data types
> Token (..), Category (..), Attributes (..)
> -- * lexing functions
> , lexFile, lexer
> ) where
> import Prelude hiding (fail)
> import Data.Char (chr, ord, isAlpha, isAlphaNum, isSpace, isUpper
> , isDigit, isOctDigit, isHexDigit)
> import Data.List (intercalate)
> import qualified Data.Map as Map (Map, union, lookup, fromList)
> import Curry.Base.LexComb
> import Curry.Base.LLParseComb (Symbol (..))
> import Curry.Base.Position
\end{verbatim}
\paragraph{Tokens} Note that the equality and ordering instances of
\texttt{Token} disregard the attributes.
\begin{verbatim}
> -- |Data type for curry lexer tokens
> data Token = Token Category Attributes
> instance Eq Token where
> Token c1 _ == Token c2 _ = c1 == c2
> instance Ord Token where
> Token c1 _ `compare` Token c2 _ = c1 `compare` c2
> instance Symbol Token where
> isEOF (Token c _) = c == EOF
> -- |Category of curry tokens
> data Category
> -- literals
> = CharTok
> | IntTok
> | FloatTok
> | IntegerTok
> | StringTok
> -- identifiers
> | Id -- identifier
> | QId -- qualified identifier
> | Sym -- symbol
> | QSym -- qualified symbol
> -- punctuation symbols
> | LeftParen -- (
> | RightParen -- )
> | Semicolon -- ;
> | LeftBrace -- {
> | RightBrace -- }
> | LeftBracket -- [
> | RightBracket -- ]
> | Comma -- ,
> | Underscore -- _
> | Backquote -- `
> -- layout (inserted by bbr)
> | LeftBraceSemicolon -- {; (turn off layout)
> | VSemicolon -- virtual ;
> | VRightBrace -- virtual }
> -- reserved keywords
> | KW_case
> -- | KW_class -- not supported yet
> | KW_choice -- deprecated
> | KW_data
> -- | KW_deriving -- not supported yet
> | KW_do
> | KW_else
> | KW_eval -- deprecated
> | KW_external
> | KW_free
> | KW_if
> | KW_import
> | KW_in
> | KW_infix
> | KW_infixl
> | KW_infixr
> -- | KW_instance -- not supported yet
> | KW_let
> | KW_module
> | KW_newtype
> | KW_of
> | KW_rigid -- deprecated
> | KW_then
> | KW_type
> | KW_where
> -- reserved operators
> | At -- @
> | Colon -- :
> | DotDot -- ..
> | DoubleColon -- ::
> | Equals -- =
> | Backslash -- \
> | Bar -- |
> | LeftArrow -- <-
> | RightArrow -- ->
> | Tilde -- ~
> | Binds -- :=
> -- | Context -- => -- not supported yet
> -- special identifiers
> | Id_as
> | Id_ccall
> | Id_forall
> | Id_hiding
> | Id_interface
> | Id_primitive
> | Id_qualified
> -- special operators
> | SymDot -- .
> | SymMinus -- -
> | SymMinusDot -- -.
> -- compiler pragma (bjp)
> | Pragma
> -- comments (only for full lexer) inserted by men & bbr
> | LineComment
> | NestedComment
> -- end-of-file token
> | EOF
> deriving (Eq,Ord)
\end{verbatim}
There are different kinds of attributes associated with the tokens.
Most attributes simply save the string corresponding to the token.
However, for qualified identifiers, we also record the list of module
qualifiers. The values corresponding to a literal token are properly
converted already. To simplify the creation and extraction of
attribute values, we make use of records.
\begin{verbatim}
> -- |Attributes associated to a token
> data Attributes
> = NoAttributes
> | CharAttributes { cval :: Char , original :: String}
> | IntAttributes { ival :: Int , original :: String}
> | FloatAttributes { fval :: Double , original :: String}
> | IntegerAttributes { intval :: Integer , original :: String}
> | StringAttributes { sval :: String , original :: String}
> | IdentAttributes { modul :: [String], sval :: String}
> instance Show Attributes where
> showsPrec _ NoAttributes = showChar '_'
> showsPrec _ (CharAttributes cv _) = shows cv
> showsPrec _ (IntAttributes iv _) = shows iv
> showsPrec _ (FloatAttributes fv _) = shows fv
> showsPrec _ (IntegerAttributes iv _) = shows iv
> showsPrec _ (StringAttributes sv _) = shows sv
> showsPrec _ (IdentAttributes mIdent ident) = showsEscaped
> $ intercalate "."
> $ mIdent ++ [ident]
\end{verbatim}
The following functions can be used to construct tokens with
specific attributes.
\begin{verbatim}
> -- |Construct a simple 'Token' without 'Attributes'
> tok :: Category -> Token
> tok t = Token t NoAttributes
> -- |Construct a 'Token' for identifiers
> idTok :: Category -> [String] -> String -> Token
> idTok t mIdent ident = Token t
> IdentAttributes { modul = mIdent, sval = ident }
> -- |Construct a 'Token' for a single 'Char'
> charTok :: Char -> String -> Token
> charTok c o = Token CharTok CharAttributes { cval = c, original = o }
> -- |Construct a 'Token' for an int value
> intTok :: Int -> String -> Token
> intTok base digits =
> Token IntTok IntAttributes { ival = convertIntegral base digits
> , original = digits }
> -- |Construct a 'Token' for a float value
> floatTok :: String -> String -> Int -> String -> Token
> floatTok mant frac expo rest =
> Token FloatTok FloatAttributes { fval = convertFloating mant frac expo
> , original = mant ++ "." ++ frac ++ rest }
> -- |Construct a 'Token' for an integer value
> integerTok :: Integer -> String -> Token
> integerTok base digits = Token IntegerTok
> IntegerAttributes { intval = (convertIntegral base digits) :: Integer
> , original = digits }
> -- |Construct a 'Token' for a string value
> stringTok :: String -> String -> Token
> stringTok cs o = Token StringTok
> StringAttributes { sval = cs, original = o }
> -- |Construct a 'Token' for a line comment
> lineCommentTok :: String -> Token
> lineCommentTok s = Token LineComment
> StringAttributes { sval = s, original = s }
> -- |Construct a 'Token' for a nested comment
> nestedCommentTok :: String -> Token
> nestedCommentTok s = Token NestedComment
> StringAttributes { sval = s, original = s }
> -- |Construct a 'Token' for a compiler pragma
> pragmaTok :: String -> Token
> pragmaTok s = Token Pragma StringAttributes { sval = s, original = s }
\end{verbatim}
The \texttt{Show} instance of \texttt{Token} is designed to display
all tokens in their source representation.
\begin{verbatim}
-- Helper for showing
> showsEscaped :: String -> ShowS
> showsEscaped s = showChar '`' . showString s . showChar '\''
> showsIdentifier :: Attributes -> ShowS
> showsIdentifier a = showString "identifier " . shows a
> showsSpecialIdentifier :: String -> ShowS
> showsSpecialIdentifier s = showString "identifier " . showsEscaped s
> showsOperator :: Attributes -> ShowS
> showsOperator a = showString "operator " . shows a
> showsSpecialOperator :: String -> ShowS
> showsSpecialOperator s = showString "operator " . showsEscaped s
> instance Show Token where
> showsPrec _ (Token Id a) = showsIdentifier a
> showsPrec _ (Token QId a) = showString "qualified "
> . showsIdentifier a
> showsPrec _ (Token Sym a) = showsOperator a
> showsPrec _ (Token QSym a) = showString "qualified "
> . showsOperator a
> showsPrec _ (Token IntTok a) = showString "integer " . shows a
> showsPrec _ (Token FloatTok a) = showString "float " . shows a
> showsPrec _ (Token CharTok a) = showString "character " . shows a
> showsPrec _ (Token IntegerTok a) = showString "integer " . shows a
> showsPrec _ (Token StringTok a) = showString "string " . shows a
> showsPrec _ (Token LeftParen _) = showsEscaped "("
> showsPrec _ (Token RightParen _) = showsEscaped ")"
> showsPrec _ (Token Semicolon _) = showsEscaped ";"
> showsPrec _ (Token LeftBrace _) = showsEscaped "{"
> showsPrec _ (Token RightBrace _) = showsEscaped "}"
> showsPrec _ (Token LeftBracket _) = showsEscaped "["
> showsPrec _ (Token RightBracket _) = showsEscaped "]"
> showsPrec _ (Token Comma _) = showsEscaped ","
> showsPrec _ (Token Underscore _) = showsEscaped "_"
> showsPrec _ (Token Backquote _) = showsEscaped "`"
> showsPrec _ (Token LeftBraceSemicolon _) = showsEscaped "{;"
> . showString " (turn off layout)"
> showsPrec _ (Token VSemicolon _) = showsEscaped ";"
> . showString " (inserted due to layout)"
> showsPrec _ (Token VRightBrace _) = showsEscaped "}"
> . showString " (inserted due to layout)"
> showsPrec _ (Token At _) = showsEscaped "@"
> showsPrec _ (Token Colon _) = showsEscaped ":"
> showsPrec _ (Token DotDot _) = showsEscaped ".."
> showsPrec _ (Token DoubleColon _) = showsEscaped "::"
> showsPrec _ (Token Equals _) = showsEscaped "="
> showsPrec _ (Token Backslash _) = showsEscaped "\\"
> showsPrec _ (Token Bar _) = showsEscaped "|"
> showsPrec _ (Token LeftArrow _) = showsEscaped "<-"
> showsPrec _ (Token RightArrow _) = showsEscaped "->"
> showsPrec _ (Token Tilde _) = showsEscaped "~"
> showsPrec _ (Token Binds _) = showsEscaped ":="
> showsPrec _ (Token SymDot _) = showsSpecialOperator "."
> showsPrec _ (Token SymMinus _) = showsSpecialOperator "-"
> showsPrec _ (Token SymMinusDot _) = showsSpecialOperator "-."
> showsPrec _ (Token KW_case _) = showsEscaped "case"
> showsPrec _ (Token KW_choice _) = showsEscaped "choice"
> showsPrec _ (Token KW_data _) = showsEscaped "data"
> showsPrec _ (Token KW_do _) = showsEscaped "do"
> showsPrec _ (Token KW_else _) = showsEscaped "else"
> showsPrec _ (Token KW_eval _) = showsEscaped "eval"
> showsPrec _ (Token KW_external _) = showsEscaped "external"
> showsPrec _ (Token KW_free _) = showsEscaped "free"
> showsPrec _ (Token KW_if _) = showsEscaped "if"
> showsPrec _ (Token KW_import _) = showsEscaped "import"
> showsPrec _ (Token KW_in _) = showsEscaped "in"
> showsPrec _ (Token KW_infix _) = showsEscaped "infix"
> showsPrec _ (Token KW_infixl _) = showsEscaped "infixl"
> showsPrec _ (Token KW_infixr _) = showsEscaped "infixr"
> showsPrec _ (Token KW_let _) = showsEscaped "let"
> showsPrec _ (Token KW_module _) = showsEscaped "module"
> showsPrec _ (Token KW_newtype _) = showsEscaped "newtype"
> showsPrec _ (Token KW_of _) = showsEscaped "of"
> showsPrec _ (Token KW_rigid _) = showsEscaped "rigid"
> showsPrec _ (Token KW_then _) = showsEscaped "then"
> showsPrec _ (Token KW_type _) = showsEscaped "type"
> showsPrec _ (Token KW_where _) = showsEscaped "where"
> showsPrec _ (Token Id_as _) = showsSpecialIdentifier "as"
> showsPrec _ (Token Id_ccall _) = showsSpecialIdentifier "ccall"
> showsPrec _ (Token Id_forall _) = showsSpecialIdentifier "forall"
> showsPrec _ (Token Id_hiding _) = showsSpecialIdentifier "hiding"
> showsPrec _ (Token Id_interface _) = showsSpecialIdentifier "interface"
> showsPrec _ (Token Id_primitive _) = showsSpecialIdentifier "primitive"
> showsPrec _ (Token Id_qualified _) = showsSpecialIdentifier "qualified"
> showsPrec _ (Token Pragma a) = shows a
> showsPrec _ (Token LineComment a) = shows a
> showsPrec _ (Token NestedComment a) = shows a
> showsPrec _ (Token EOF _) = showString "<end-of-file>"
\end{verbatim}
Maps for reserved operators and identifiers
\begin{verbatim}
> -- |Map of reserved operators
> reservedOps:: Map.Map String Category
> reservedOps = Map.fromList
> [ ("@" , At )
> , (":" , Colon )
> , ("::", DoubleColon)
> , ("..", DotDot )
> , ("=" , Equals )
> , ("\\", Backslash )
> , ("|" , Bar )
> , ("<-", LeftArrow )
> , ("->", RightArrow )
> , ("~" , Tilde )
> , (":=", Binds )
> ]
> -- |Map of reserved and special operators
> reservedSpecialOps :: Map.Map String Category
> reservedSpecialOps = Map.union reservedOps $ Map.fromList
> [ ("." , SymDot )
> , ("-" , SymMinus )
> , ("-.", SymMinusDot)
> ]
> -- |Map of keywords
> keywords :: Map.Map String Category
> keywords = Map.fromList
> [ ("case" , KW_case )
> , ("choice" , KW_choice )
> , ("data" , KW_data )
> , ("do" , KW_do )
> , ("else" , KW_else )
> , ("eval" , KW_eval )
> , ("external", KW_external)
> , ("free" , KW_free )
> , ("if" , KW_if )
> , ("import" , KW_import )
> , ("in" , KW_in )
> , ("infix" , KW_infix )
> , ("infixl" , KW_infixl )
> , ("infixr" , KW_infixr )
> , ("let" , KW_let )
> , ("module" , KW_module )
> , ("newtype" , KW_newtype )
> , ("of" , KW_of )
> , ("rigid" , KW_rigid )
> , ("then" , KW_then )
> , ("type" , KW_type )
> , ("where" , KW_where )
> ]
> -- |Map of reserved and special identifiers
> keywordsSpecialIds :: Map.Map String Category
> keywordsSpecialIds = Map.union keywords $ Map.fromList
> [ ("as" , Id_as )
> , ("ccall" , Id_ccall )
> , ("forall" , Id_forall )
> , ("hiding" , Id_hiding )
> , ("interface", Id_interface)
> , ("primitive", Id_primitive)
> , ("qualified", Id_qualified)
> ]
\end{verbatim}
Character classes
\begin{verbatim}
> isIdent :: Char -> Bool
> isIdent c = isAlphaNum c || c `elem` "'_"
> isSymbol :: Char -> Bool
> isSymbol c = c `elem` "~!@#$%^&*+-=<>:?./|\\"
\end{verbatim}
Lexing functions
\begin{verbatim}
> type SuccessP a = Position -> Token -> P a
> type FailP a = Position -> String -> P a
> lexFile :: P [(Position,Token)]
> lexFile = fullLexer tokens failP
> where tokens p t@(Token c _)
> | c == EOF = returnP [(p, t)]
> | otherwise = lexFile `thenP` returnP . ((p, t) :)
> lexer :: SuccessP a -> FailP a -> P a
> lexer success fail = skipBlanks
> where -- skipBlanks moves past whitespace and comments
> skipBlanks p [] bol = success p (tok EOF) p [] bol
> skipBlanks p ('\t':s) bol = skipBlanks (tab p) s bol
> skipBlanks p ('\n':s) _bol = skipBlanks (nl p) s True
> skipBlanks p ('-':'-':s) _bol = skipBlanks (nl p) (tail' (dropWhile (/= '\n') s)) True
> skipBlanks p ('{':'-':'#':s) bol = lexPragma id p success fail (incr p 3) s bol
> skipBlanks p ('{':'-':s) bol =
> skipNestedComment p skipBlanks fail (incr p 2) s bol
> skipBlanks p (c:s) bol
> | isSpace c = skipBlanks (next p) s bol
> | otherwise =
> (if bol then lexBOL else lexToken) success fail p (c:s) bol
> tail' [] = []
> tail' (_:tl) = tl
> fullLexer :: SuccessP a -> FailP a -> P a
> fullLexer success fail = skipBlanks
> where -- skipBlanks moves past whitespace
> skipBlanks p [] bol = success p (tok EOF) p [] bol
> skipBlanks p ('\t':s) bol = skipBlanks (tab p) s bol
> skipBlanks p ('\n':s) _bol = skipBlanks (nl p) s True
> skipBlanks p s@('-':'-':_) bol = lexLineComment success p s bol
> skipBlanks p s@('{':'-':'#':_) bol = lexPragma id p success fail p s bol
> skipBlanks p s@('{':'-':_) bol = lexNestedComment 0 id p success fail p s bol
> skipBlanks p (c:s) bol
> | isSpace c = skipBlanks (next p) s bol
> | otherwise =
> (if bol then lexBOL else lexToken) success fail p (c:s) bol
> lexLineComment :: SuccessP a -> P a
> lexLineComment success p s = case break (=='\n') s of
> (comment,rest) -> success p (lineCommentTok comment) (incr p (length comment)) rest
> lexPragma :: (String -> String) -> Position -> SuccessP a -> FailP a -> P a
> lexPragma prag p0 success _ p ('#':'-':'}':s)
> = success p0 (pragmaTok (prag "#-}")) (incr p 3) s
> lexPragma prag p0 success fail p (c@'\t':s)
> = lexPragma (prag . (c:)) p0 success fail (tab p) s
> lexPragma prag p0 success fail p (c@'\n':s)
> = lexPragma (prag . (c:)) p0 success fail (nl p) s
> lexPragma prag p0 success fail p (c:s)
> = lexPragma (prag . (c:)) p0 success fail (next p) s
> lexPragma _ p0 _ fail p ""
> = fail p0 "Unterminated pragma" p []
> lexNestedComment :: Int -> (String -> String) ->
> Position -> SuccessP a -> FailP a -> P a
> lexNestedComment 1 comment p0 success _ p ('-':'}':s) =
> success p0 (nestedCommentTok (comment "-}") ) (incr p 2) s
> lexNestedComment n comment p0 success fail p ('{':'-':s) =
> lexNestedComment (n+1) (comment . ("{-"++)) p0 success fail (incr p 2) s
> lexNestedComment n comment p0 success fail p ('-':'}':s) =
> lexNestedComment (n-1) (comment . ("-}"++)) p0 success fail (incr p 2) s
> lexNestedComment n comment p0 success fail p (c@'\t':s) =
> lexNestedComment n (comment . (c:)) p0 success fail (tab p) s
> lexNestedComment n comment p0 success fail p (c@'\n':s) =
> lexNestedComment n (comment . (c:)) p0 success fail (nl p) s
> lexNestedComment n comment p0 success fail p (c:s) =
> lexNestedComment n (comment . (c:)) p0 success fail (next p) s
> lexNestedComment _ _ p0 _ fail p "" =
> fail p0 "Unterminated nested comment" p []
> skipNestedComment :: Position -> P a -> FailP a -> P a
> skipNestedComment _ success _ p ('-':'}':s) = success (incr p 2) s
> skipNestedComment p0 success fail p ('{':'-':s) =
> skipNestedComment p (skipNestedComment p0 success fail) fail (incr p 2) s
> skipNestedComment p0 success fail p ('\t':s) =
> skipNestedComment p0 success fail (tab p) s
> skipNestedComment p0 success fail p ('\n':s) =
> skipNestedComment p0 success fail (nl p) s
> skipNestedComment p0 success fail p (_:s) =
> skipNestedComment p0 success fail (next p) s
> skipNestedComment p0 _ fail p [] =
> fail p0 "Unterminated nested comment at end-of-file" p []
> lexBOL :: SuccessP a -> FailP a -> P a
> lexBOL success fail p s _ [] = lexToken success fail p s False []
> lexBOL success fail p s _ ctxt@(n:rest)
> | col < n = success p (tok VRightBrace) p s True rest
> | col == n = success p (tok VSemicolon) p s False ctxt
> | otherwise = lexToken success fail p s False ctxt
> where col = column p
> lexToken :: SuccessP a -> FailP a -> P a
> lexToken success _ p [] = success p (tok EOF) p []
> lexToken success fail p (c:s)
> | c == '(' = token LeftParen
> | c == ')' = token RightParen
> | c == ',' = token Comma
> | c == ';' = token Semicolon
> | c == '[' = token LeftBracket
> | c == ']' = token RightBracket
> | c == '_' = token Underscore
> | c == '`' = token Backquote
> | c == '{' = lexLeftBrace (success p) (next p) s
> | c == '}' = \bol -> token RightBrace bol . drop 1
> | c == '\'' = lexChar p success fail (next p) s
> | c == '\"' = lexString p success fail (next p) s
> | isAlpha c = lexIdent (success p) p (c:s)
> | isSymbol c = lexSymbol (success p) p (c:s)
> | isDigit c = lexNumber (success p) p (c:s)
> | otherwise = fail p ("Illegal character "