Commit f397a0ec authored by Björn Peemöller 's avatar Björn Peemöller
Browse files

Refactoring of token stream generation

parent 669414b7
......@@ -97,7 +97,7 @@ Library
, Interfaces
, Modules
, ModuleSummary
, Token.TokenStream
, TokenStream
, Transformations
, Transformations.CaseCompletion
, Transformations.CurryToIL
......
......@@ -354,7 +354,7 @@ options =
, Option "" ["html"]
(NoArg (onOpts $ \ opts -> opts { optMode = ModeHtml }))
"generate html code and exit"
, Option "" ["token"]
, Option "" ["tokens"]
(NoArg (onOpts $ \ opts -> opts { optMode = ModeToken }))
"generate token stream and exit"
, Option "" ["parse-only"]
......
{- |
Module : $Header$
Description : Generating List of Tokens and Positions
This module defines a function for writing the list of tokens
and positions of a Curry source module into a separate file.
-}
module Token.TokenStream (source2token) where
import Control.Monad.Writer (liftIO)
import Data.List (intercalate)
import System.FilePath (replaceExtension)
import Curry.Base.Monad (CYIO, liftCYM, failMessages, runCYMIgnWarn)
import Curry.Base.Position (Position (..))
import Curry.Base.Pretty (text)
import Curry.Files.Filenames (addCurrySubdirModule)
import Curry.Files.PathUtils (readModule)
import Curry.Syntax -- import data constructors for all tokens
import Base.Messages (abortWithMessages, message, Message)
import CompilerOpts (Options (..))
import CurryBuilder (findCurry)
-- |Write list of tokens and positions into a file
-- TODO: To get the name of the module its header is getting parsed.
-- This should be improved because there shouldn't be done any
-- parsing when extracting only the TokenStream.
source2token :: Options -> String -> CYIO()
source2token opts s = do
srcFile <- findCurry opts s
parse <- (liftCYM $ parseHeader srcFile s)
(Module _ mid _ _ _) <- return $ patchModuleId srcFile parse
eitherErrsToks <- formatToken srcFile
outFile <- return $ replaceExtension (addCurrySubdirModule (optUseSubdir opts) mid srcFile) ".token"
case eitherErrsToks of
Left errs -> liftIO $ abortWithMessages errs
Right toks -> do let content = showTokenStream toks -- $ map (\(p, t) -> (p, showToken t)) toks
liftIO $ writeFile outFile content
showTokenStream :: [(Position, Token)] -> String
showTokenStream [] = "[]"
showTokenStream ts = "[ " ++ intercalate "\n, " (map showPosToken ts) ++ "\n]\n"
where showPosToken (p, t) = "(" ++ showPosition p ++
", " ++ showToken t ++ ")"
-- show position as "(line, column)"
showPosition :: Position -> String
showPosition p = "(" ++ (show $ line p) ++ ", " ++ (show $ column p) ++ ")"
-- |Create list of tokens and their positions from curry source file
formatToken :: String -> CYIO (Either [Message] [(Position, Token)])
formatToken f = do
mbModule <- liftIO $ readModule f
case mbModule of
Nothing -> failMessages [message $ text $ "Missing file: " ++ f]
Just src -> return $ runCYMIgnWarn (lexSource f src)
-- |Show tokens and their value if needed
showToken :: Token -> String
showToken t =
case t of
-- literals
Token CharTok (CharAttributes cvalue _) -> "CharTok " ++ show cvalue
Token IntTok (IntAttributes ivalue _) -> "IntTok " ++ show ivalue
Token FloatTok (FloatAttributes fvalue _) -> "FloatTok " ++ show fvalue
Token StringTok (StringAttributes svalue _) -> "StringTok " ++ show svalue
-- identifiers
Token Id (IdentAttributes _ svalue) -> "Id " ++ show svalue
Token QId (IdentAttributes _ svalue) -> "QId " ++ show svalue
Token Sym (IdentAttributes _ svalue) -> "Sym " ++ show svalue
Token QSym (IdentAttributes _ svalue) -> "QSym " ++ show svalue
-- punctuation symbols
Token LeftParen NoAttributes -> "LeftParen"
Token RightParen NoAttributes -> "RightParen"
Token Semicolon NoAttributes -> "Semicolon"
Token LeftBrace NoAttributes -> "LeftBrace"
Token RightBrace NoAttributes -> "RightBrace"
Token LeftBracket NoAttributes -> "LeftBracket"
Token RightBracket NoAttributes -> "RightBracket"
Token Comma NoAttributes -> "Comma"
Token Underscore NoAttributes -> "Underscore"
Token Backquote NoAttributes -> "Backquote"
-- layout
Token VSemicolon NoAttributes -> "VSemicolon"
Token VRightBrace NoAttributes -> "VRightBrace"
-- reserved keywords
Token KW_case (IdentAttributes _ _) -> "KW_case"
Token KW_data (IdentAttributes _ _) -> "KW_data"
Token KW_do (IdentAttributes _ _) -> "KW_do"
Token KW_else (IdentAttributes _ _) -> "KW_else"
Token KW_external (IdentAttributes _ _) -> "KW_external"
Token KW_fcase (IdentAttributes _ _) -> "KW_fcase"
Token KW_foreign (IdentAttributes _ _) -> "KW_foreign"
Token KW_free (IdentAttributes _ _) -> "KW_free"
Token KW_if (IdentAttributes _ _) -> "KW_if"
Token KW_import (IdentAttributes _ _) -> "KW_import"
Token KW_in (IdentAttributes _ _) -> "KW_in"
Token KW_infix (IdentAttributes _ _) -> "KW_infix"
Token KW_infixl (IdentAttributes _ _) -> "KW_infixl"
Token KW_infixr (IdentAttributes _ _) -> "KW_infixr"
Token KW_let (IdentAttributes _ _) -> "KW_let"
Token KW_module (IdentAttributes _ _) -> "KW_module"
Token KW_newtype (IdentAttributes _ _) -> "KW_newtype"
Token KW_of (IdentAttributes _ _) -> "KW_of"
Token KW_then (IdentAttributes _ _) -> "KW_then"
Token KW_type (IdentAttributes _ _) -> "KW_type"
Token KW_where (IdentAttributes _ _) -> "KW_where"
-- reserved operators
Token At (IdentAttributes _ _) -> "At"
Token Colon (IdentAttributes _ _) -> "Colon"
Token DotDot (IdentAttributes _ _) -> "DotDot"
Token DoubleColon (IdentAttributes _ _) -> "DoubleColon"
Token Equals (IdentAttributes _ _) -> "Equals"
Token Backslash (IdentAttributes _ _) -> "Backslash"
Token Bar (IdentAttributes _ _) -> "Bar"
Token LeftArrow (IdentAttributes _ _) -> "LeftArrow"
Token RightArrow (IdentAttributes _ _) -> "RightArrow"
Token Tilde (IdentAttributes _ _) -> "Tilde"
Token Bind (IdentAttributes _ _) -> "Bind"
Token Select (IdentAttributes _ _) -> "Select"
-- special identifiers
Token Id_as (IdentAttributes _ _) -> "Id_as"
Token Id_ccall (IdentAttributes _ _) -> "Id_ccall"
Token Id_forall (IdentAttributes _ _) -> "Id_forall"
Token Id_hiding (IdentAttributes _ _) -> "Id_hiding"
Token Id_interface (IdentAttributes _ _) -> "Id_interface"
Token Id_primitive (IdentAttributes _ _) -> "Id_primitive"
Token Id_qualified (IdentAttributes _ _) -> "Id_qualified"
-- special operators
Token SymDot (IdentAttributes _ _) -> "SymDot"
Token SymMinus (IdentAttributes _ _) -> "SymMinus"
Token SymMinusDot (IdentAttributes _ _) -> "SymMinusDot"
-- pragmas
Token PragmaLanguage NoAttributes -> "PragmaLanguage"
Token PragmaOptions (OptionsAttributes _ toolArgs_) -> "PragmaOptions " ++ show toolArgs_
Token PragmaHiding NoAttributes -> "PragmaHiding"
Token PragmaEnd NoAttributes -> "PragmaEnd"
-- comments
Token LineComment (StringAttributes svalue _) -> "LineComment " ++ show svalue
Token NestedComment (StringAttributes svalue _) -> "NestedComment " ++ show svalue
-- end-of-file token
Token EOF NoAttributes -> "EOF"
-- else
_ -> show t
{- |
Module : $Header$
Description : Generating List of Tokens and Positions
Copyright : (c) 2015 - 2016, Katharina Rahf
2015 - 2016, Björn Peemöller
2015 - 2016, Jan Tikovsky
This module defines a function for writing the list of tokens
and positions of a Curry source module into a separate file.
-}
module TokenStream (source2token) where
import Control.Monad.Writer (liftIO)
import Data.List (intercalate)
import System.FilePath (replaceExtension)
import Curry.Base.Ident (ModuleIdent)
import Curry.Base.Monad (CYIO, liftCYM, failMessages)
import Curry.Base.Position (Position (..))
import Curry.Base.Pretty (text)
import Curry.Files.Filenames (addCurrySubdirModule)
import Curry.Files.PathUtils (readModule)
import Curry.Syntax ( Module (..)
, Token (..), Category (..), Attributes (..)
, lexSource, parseHeader, patchModuleId
)
import Base.Messages (message)
import CompilerOpts (Options (..))
import CurryBuilder (findCurry)
-- |Write list of positions and tokens into a file.
-- TODO: To get the name of the module its header is getting parsed.
-- This should be improved because there shouldn't be done any
-- parsing when extracting only the TokenStream.
source2token :: Options -> String -> CYIO ()
source2token opts s = do
srcFile <- findCurry opts s
mModule <- liftIO (readModule srcFile)
case mModule of
Nothing -> failMessages [message $ text $ "Missing file: " ++ srcFile]
Just src -> do
posToks <- liftCYM (lexSource srcFile src)
header <- liftCYM (parseHeader srcFile src)
let Module _ mid _ _ _ = patchModuleId srcFile header
outFile = tokenFile (optUseSubdir opts) mid srcFile
liftIO $ writeFile outFile (showTokenStream posToks)
tokenFile :: Bool -> ModuleIdent -> FilePath -> FilePath
tokenFile useSubdir m fn = replaceExtension
(addCurrySubdirModule useSubdir m fn)
".token"
-- Show a list of 'Position' and 'Token' tuples.
-- The list is split into one tuple on each line to increase readability.
showTokenStream :: [(Position, Token)] -> String
showTokenStream [] = "[]\n"
showTokenStream ts = "[ " ++ intercalate "\n, " (map showPT ts) ++ "\n]\n"
where showPT (p, t) = "(" ++ showPosition p ++ ", " ++ showToken t ++ ")"
-- show 'Position' as "(line, column)"
showPosition :: Position -> String
showPosition p = "(" ++ show (line p) ++ ", " ++ show (column p) ++ ")"
-- |Show tokens and their value if needed
showToken :: Token -> String
-- literals
showToken (Token CharTok a) = "CharTok" +++ showAttributes a
showToken (Token IntTok a) = "IntTok" +++ showAttributes a
showToken (Token FloatTok a) = "FloatTok" +++ showAttributes a
showToken (Token StringTok a) = "StringTok" +++ showAttributes a
-- identifiers
showToken (Token Id a) = "Id" +++ showAttributes a
showToken (Token QId a) = "QId" +++ showAttributes a
showToken (Token Sym a) = "Sym" +++ showAttributes a
showToken (Token QSym a) = "QSym" +++ showAttributes a
-- punctuation symbols
showToken (Token LeftParen _) = "LeftParen"
showToken (Token RightParen _) = "RightParen"
showToken (Token Semicolon _) = "Semicolon"
showToken (Token LeftBrace _) = "LeftBrace"
showToken (Token RightBrace _) = "RightBrace"
showToken (Token LeftBracket _) = "LeftBracket"
showToken (Token RightBracket _) = "RightBracket"
showToken (Token Comma _) = "Comma"
showToken (Token Underscore _) = "Underscore"
showToken (Token Backquote _) = "Backquote"
-- layout
showToken (Token VSemicolon _) = "VSemicolon"
showToken (Token VRightBrace _) = "VRightBrace"
-- reserved keywords
showToken (Token KW_case _) = "KW_case"
showToken (Token KW_data _) = "KW_data"
showToken (Token KW_do _) = "KW_do"
showToken (Token KW_else _) = "KW_else"
showToken (Token KW_external _) = "KW_external"
showToken (Token KW_fcase _) = "KW_fcase"
showToken (Token KW_foreign _) = "KW_foreign"
showToken (Token KW_free _) = "KW_free"
showToken (Token KW_if _) = "KW_if"
showToken (Token KW_import _) = "KW_import"
showToken (Token KW_in _) = "KW_in"
showToken (Token KW_infix _) = "KW_infix"
showToken (Token KW_infixl _) = "KW_infixl"
showToken (Token KW_infixr _) = "KW_infixr"
showToken (Token KW_let _) = "KW_let"
showToken (Token KW_module _) = "KW_module"
showToken (Token KW_newtype _) = "KW_newtype"
showToken (Token KW_of _) = "KW_of"
showToken (Token KW_then _) = "KW_then"
showToken (Token KW_type _) = "KW_type"
showToken (Token KW_where _) = "KW_where"
-- reserved operators
showToken (Token At _) = "At"
showToken (Token Colon _) = "Colon"
showToken (Token DotDot _) = "DotDot"
showToken (Token DoubleColon _) = "DoubleColon"
showToken (Token Equals _) = "Equals"
showToken (Token Backslash _) = "Backslash"
showToken (Token Bar _) = "Bar"
showToken (Token LeftArrow _) = "LeftArrow"
showToken (Token RightArrow _) = "RightArrow"
showToken (Token Tilde _) = "Tilde"
showToken (Token Bind _) = "Bind"
showToken (Token Select _) = "Select"
-- special identifiers
showToken (Token Id_as _) = "Id_as"
showToken (Token Id_ccall _) = "Id_ccall"
showToken (Token Id_forall _) = "Id_forall"
showToken (Token Id_hiding _) = "Id_hiding"
showToken (Token Id_interface _) = "Id_interface"
showToken (Token Id_primitive _) = "Id_primitive"
showToken (Token Id_qualified _) = "Id_qualified"
-- special operators
showToken (Token SymDot _) = "SymDot"
showToken (Token SymMinus _) = "SymMinus"
showToken (Token SymMinusDot _) = "SymMinusDot"
-- pragmas
showToken (Token PragmaLanguage _) = "PragmaLanguage"
showToken (Token PragmaOptions a) = "PragmaOptions" +++ showAttributes a
showToken (Token PragmaHiding _) = "PragmaHiding"
showToken (Token PragmaEnd _) = "PragmaEnd"
-- comments
showToken (Token LineComment a) = "LineComment" +++ showAttributes a
showToken (Token NestedComment a) = "NestedComment" +++ showAttributes a
-- end-of-file token
showToken (Token EOF _) = "EOF"
showAttributes :: Attributes -> String
showAttributes NoAttributes = ""
showAttributes (CharAttributes c _) = show c
showAttributes (IntAttributes i _) = show i
showAttributes (FloatAttributes f _) = show f
showAttributes (StringAttributes s _) = show s
showAttributes (IdentAttributes m i) = intercalate "." (m ++ [i])
showAttributes (OptionsAttributes t a) = show t ++ ' ' : show a
-- Concatenate two 'String's with a smart space in between,
-- which is only added if both 'String's are non-empty
(+++) :: String -> String -> String
[] +++ t = t
s +++ [] = s
s +++ t = s ++ ' ' : t
......@@ -19,7 +19,7 @@ import Curry.Base.Monad (runCYIO)
import Base.Messages
import Files.CymakePath (cymakeGreeting, cymakeVersion)
import Html.CurryHtml (source2html)
import Token.TokenStream (source2token)
import TokenStream (source2token)
import CurryBuilder (buildCurry)
import CompilerOpts (Options (..), CymakeMode (..), getCompilerOpts, usage)
......@@ -37,11 +37,11 @@ cymake (prog, opts, files, errs)
| not $ null errs = badUsage prog errs
| null files = badUsage prog ["no input files"]
| mode == ModeHtml =
runCYIO (mapM_ (source2html opts) files) >>= okOrAbort
runCYIO (mapM_ (source2html opts) files) >>= okOrAbort
| mode == ModeToken =
runCYIO (mapM_ (source2token opts) files) >>= okOrAbort
| otherwise =
runCYIO (mapM_ (buildCurry opts) files) >>= okOrAbort
runCYIO (mapM_ (buildCurry opts) files) >>= okOrAbort
where
mode = optMode opts
warnOpts = optWarnOpts opts
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment