Commit 18780ba6 authored by Michael Hanus 's avatar Michael Hanus

addtypes packaged

parents
*~
.cpm
.curry
{
"name": "addtypes",
"version": "0.0.1",
"author": "Bernd Brassel <bbr@informatik.uni-kiel.de>",
"maintainer": "Michael Hanus <mh@informatik.uni-kiel.de>",
"synopsis": "A tool to add missing type signatures in a Curry program",
"dependencies": {
},
"description":
"This package contains a tool which adds missing type signatures
to top-level operations in a Curry module.
Moreover, it contains a library to process strings containing
Curry source code and classifies it into a few standard categories",
"exportedModules": [ "AddTypes", "CurryStringClassifier" ],
"executable": { "name": "curry-addtypes",
"main": "AddTypes"
},
"source": {
"git": "https://git.ps.informatik.uni-kiel.de/curry-packages/addtypes.git",
"tag": "$version"
}
}
------------------------------------------------------------------
-- A tool to add all those type signatures, you didn't bother to
-- write while developing the program.
--
-- @author Bernd Brassel, with changes by Michael Hanus
-- @version November 2016
--
-- Possible extensions: Use type synonyms to reduce annotations
------------------------------------------------------------------
{-# OPTIONS_CYMAKE -Wno-incomplete-patterns #-}
module AddTypes(main,addTypeSignatures) where
import AbstractCurry.Types
import AbstractCurry.Files
import AbstractCurry.Pretty
import AllSolutions
import CurryStringClassifier
import Distribution (stripCurrySuffix)
import FileGoodies
import List
import Pretty
import System (exitWith, system, getArgs)
-- The tool is rather simple, it uses Curry's facilities for
-- meta-programming to read the program in the form defined
-- in the AbstractCurry module.
-- The libraries for meta-programming provides commands to read
-- AbstractCurry programs typed and untyped.
-- By comparing the results of these two operations, we are able to
-- distinguish the inferred types from those given by the programmer.
--
-- addtypes makes use of the CurryStringClassifier, cf. function addTypes.
--- addtypes is supposed to get its argument, the file to add type signatures
--- to from the shell.
main :: IO ()
main = do
args <- getArgs
case args of
["-h"] -> printUsage
["--help"] -> printUsage
["-?"] -> printUsage
[fname] -> do
let progname = stripCurrySuffix fname
writeWithTypeSignatures progname
putStrLn $ "Signatures added.\nA backup of the original " ++
"file has been written to "++progname++".ORG.curry"
_ -> printUsage >> exitWith 1
printUsage :: IO ()
printUsage = putStrLn $ unlines
[ "A tool to add missing type signatures to top-level operations"
, ""
, "Usage:"
, ""
, " curry addtypes <Curry program>"
]
--- the given file is read three times: a) typed, to get all the necessary
--- type information b) untyped to find out, which of the types were
--- specified by the user and c) as a simple string to which the signatures
--- are added. Before adding anything, addtypes will write a backup
--- to <given filename>.ORG.curry
writeWithTypeSignatures :: String -> IO ()
writeWithTypeSignatures progname = do
system $ "cp -p "++progname++".curry "++progname++".ORG.curry"
newprog <- addTypeSignatures progname
writeFile (progname++".curry") newprog
addTypeSignatures :: String -> IO String
addTypeSignatures progname = do
typedProg <- readCurry progname
untypedProg <- readUntypedCurry progname
progLines <- readFile (progname++".curry")
mbprog <- getOneSolution -- enforce reading of all files before returning
(\p -> p =:= unscan (addTypes (scan progLines)
(getTypes typedProg untypedProg)))
system $ "rm -f "++progname++".acy "++progname++".uacy"
maybe (error "AddTypes: can't add type signatures") return mbprog
--- retrieve the functions without type signature and their type
getTypes :: CurryProg -> CurryProg -> [(String,CTypeExpr)]
getTypes (CurryProg _ _ _ funcDecls1 _) (CurryProg _ _ _ funcDecls2 _)
= getTypesFuncDecls funcDecls1 funcDecls2
where
getTypesFuncDecls [] [] = []
getTypesFuncDecls (CFunc name _ _ t1 _:fs1) (CFunc _ _ _ t2 _:fs2)
| isUntyped t2 = (snd name,t1) : getTypesFuncDecls fs1 fs2
| otherwise = getTypesFuncDecls fs1 fs2
--- addtypes implements a simple algorithm to decide where to add type
--- information. Find the first line wich contains the function name
--- on the left hand side and insert the type annotation before that line.
--- The problem with this algorithm is that it might get confused by
--- comments. This is where the Curry string classifier comes in.
--- After using CurryStringClassifier.scan the function addTypes only
--- has to process "Code" tokens and can be sure that there will be no
--- confusion with Comments, Strings or Chars within the program.
addTypes :: Tokens -> [(String,CTypeExpr)] -> Tokens
addTypes [] _ = []
addTypes (ModuleHead s:ts) fts = ModuleHead s : (addTypes ts fts)
addTypes (SmallComment s:ts) fts = SmallComment s : (addTypes ts fts)
addTypes (BigComment s:ts) fts = BigComment s : (addTypes ts fts)
addTypes (Text s:ts) fts = Text s : (addTypes ts fts)
addTypes (Letter s:ts) fts = Letter s : (addTypes ts fts)
addTypes (Code s:ts) fts = Code newS : newTs
where
newS = addTypesCode s newFts fts
newTs = if null newFts then ts else addTypes ts newFts
newFts = x where x free
--- Within a given code segment insert all annotations for the contained
--- function and return the new code + the list of functions not yet
--- inserted (via the logical variable newFts).
addTypesCode :: [Char] -> [([Char],CTypeExpr)] -> [([Char],CTypeExpr)] -> [Char]
addTypesCode code [] [] = code
addTypesCode code newFts ((f,t):fts)
| null code = (newFts=:=((f,t):fts)) &> []
| otherwise
= case lhs of
[] -> head remainder
: addTypesCode (tail remainder) newFts ((f,t):fts)
' ':_ -> line ++ addTypesCode remainder newFts ((f,t):fts)
_ -> if defines f lhs
then pretty 78 (ppSig $ normalize t) ++ "\n" ++
line ++ addTypesCode remainder newFts fts
else line ++ addTypesCode remainder newFts ((f,t):fts)
where
(line,remainder) = break (=='\n') code
(lhs,_) = break (=='=') line
printf = if all (flip elem infixIDs) f then '(':f++")" else f
ppSig texp = nest 2 $
sep [ text printf
, align $ doubleColon <+> ppCTypeExpr defaultOptions texp]
--- name type variables with a,b,c ... z, t0, t1, ...
toTVar :: Int -> CTypeExpr
toTVar n | n<26 = CTVar (n,[chr (97+n)])
| otherwise = CTVar (n,"t"++show (n-26))
--- test for functions not typed by the programmer
isUntyped :: CTypeExpr -> Bool
isUntyped typeexpr
= case typeexpr of
(CTCons (mod,name) []) -> name == "untyped" && mod == "Prelude"
_ -> False
--- normalizing is to rename Variables left-right beginning with 0
--- and replace singletons with an "_"
normalize :: CTypeExpr -> CTypeExpr
normalize t | varNames 0 (tvars t newT) = newT where newT free
--- retrieve all vars contained in a ttype expression and simultaniously
--- build a new type expression with logical variables for type vars
tvars :: CTypeExpr -> CTypeExpr -> [(Int,CTypeExpr)]
tvars (CTVar (i,_)) m = [(i,m)]
tvars (CTCons n args) (CTCons n' args')
| n=:=n' = concat (dualMap tvars args args')
tvars (CFuncType t1 t2) (CFuncType t1' t2')
= tvars t1 t1' ++ tvars t2 t2'
--- give a list of variables names depending on whether they are singletons
--- or not
varNames :: Int -> [(_,CTypeExpr)] -> Success
varNames _ [] = success
varNames n ((i,v):ivs)
| null is = (v=:=(CTVar (0,"_"))) &> (varNames n others)
| otherwise = (giveName (toTVar n) (v:map snd is)) &> (varNames (n+1) others)
where
(is,others) = partition (\ (i',_) -> i==i') ivs
giveName _ [] = success
giveName name (x:xs) = name=:=x & giveName name xs
--- map on two lists simultaniously. Can't use zip, because the second
--- argument here is a logical variable.
dualMap :: (a -> b -> c) -> [a] -> [b] -> [c]
dualMap _ [] [] = []
dualMap f (x:xs) (y:ys) = f x y:dualMap f xs ys
--- a left hand side defines a function named f, if it starts leftmost,
--- and contains f
defines :: [Char] -> [Char] -> Bool
defines f lhs
| null ts = False
| head lhs == ' ' = False
| otherwise = elem f ts
where
ts = symbols lhs
--- delimiters between terms on left hand sides
delimiters :: String
delimiters = " ([{,}])"
--- these characters form infix operator names
infixIDs :: String
infixIDs = "~!@#$%^&*+-=<>?./|\\:"
--- divide a left hand side to a list of symbols contained
--- e.g. symbols "f x [y,z]" = ["f","x","y","z"]
symbols :: [Char] -> [[Char]]
symbols lhs = syms [] lhs
where
maybeSym t = if null t then [] else [t]
syms s [] = maybeSym s
syms s (x:xs)
| elem x delimiters
= maybeSym s ++ syms [] (dropWhile (flip elem delimiters) xs)
| otherwise
= syms (s++[x]) xs
----------------------------------------------------------------------
--- The Curry string classifier is a simple tool to process strings containing
--- Curry source code. The source string is classified into the following
--- categories:
---
--- * moduleHead - module interface, imports, operators
---
--- * code - the part where the actual program is defined
---
--- * big comment - parts enclosed in {- ... -}
---
--- * small comment - from "--" to the end of a line
---
--- * text - a string, i.e. text enclosed in "..."
---
--- * letter - the given string is the representation of a character
---
--- * meta - containing information for meta programming
---
--- For an example to use the state scanner cf. addtypes, the tool
--- to add function types to a given program.
---
--- @author Bernd Brassel
--- @version April 2005
--- @category meta
----------------------------------------------------------------------
module CurryStringClassifier
(Tokens,Token(..), scan, plainCode, unscan,
isSmallComment, isBigComment, isComment, isText, isLetter,
isCode, isModuleHead, isMeta, readScan,testScan)
where
import Char(isDigit,isSpace)
--- The different categories to classify the source code.
data Token = SmallComment String
| BigComment String
| Text String
| Letter String
| Code String
| ModuleHead String
| Meta String
type Tokens = [Token]
--- test for category "SmallComment"
isSmallComment x = case x of
SmallComment _ -> True
_ -> False
--- test for category "BigComment"
isBigComment x = case x of
BigComment _ -> True
_ -> False
--- test if given token is a comment (big or small)
isComment x = isSmallComment x || isBigComment x
--- test for category "Text" (String)
isText x = case x of
Text _ -> True
_ -> False
--- test for category "Letter" (Char)
isLetter x = case x of
Letter _ -> True
_ -> False
--- test for category "Code"
isCode x = case x of
Code _ -> True
_ -> False
--- test for category "ModuleHead", ie imports and operator declarations
isModuleHead x = case x of
ModuleHead _ -> True
_ -> False
--- test for category "Meta", ie between {+ and +}
isMeta x = case x of
Meta _ -> True
_ -> False
weaveIntoCode :: (Tokens -> Tokens) -> Tokens -> Tokens
weaveIntoCode f ts =
let (cs,ncs) = unweaveCode ts in weave (f cs,ncs)
unweaveCode :: Tokens -> (Tokens,Tokens)
unweaveCode [] = ([],[])
unweaveCode (t:ts) = let (cs,ncs) = unweaveCode ts in
if isCode t then (t:cs,ncs) else (cs,t:ncs)
weave xys = case xys of
([],[]) -> []
([],[y]) -> [y]
([x],[]) -> [x]
(x:xs,y:ys) -> x:y:weave (xs,ys)
--- Divides the given string into the six categories.
--- For applications it is important to know whether a given part of
--- code is at the beginning of a line or in the middle. The state scanner
--- organizes the code in such a way that every string categorized as
--- "Code" <b>always</b> starts in the middle of a line.
scan :: [Char] -> Tokens
scan s = modHead id (stateScan 1 (Code x) x s) where x free
stateScan :: Int -> Token -> [Char] -> [Char] -> Tokens
stateScan _ token x "" | x=:="" = [token]
stateScan _ (Code co) x [c] | x=:=[c] = maybeCode co []
stateScan _ (Text t) x [c]
| x=:="" = if c=='\"' then [Text t]
else error "File ended while scanning string."
stateScan _ (BigComment _) x [_]
| x=:="" = error "File ended while expecting -}"
stateScan _ (Meta _) x [_]
| x=:="" = error "File ended while expecting +}"
stateScan line (Code co) x (c:c':cs)
| c=='\"' = (x=:="") &>
(maybeCode co (stateScan line (Text y) y (c':cs)))
| c=='-' && c'=='-'
= let (comment,rest) = span (/='\n') cs
in (x=:="") &> maybeCode co
(SmallComment comment :
(stateScan line (Code y) y rest))
| c=='{' && c'=='-'
= (x=:="") &> maybeCode co (stateScan line (BigComment y) y cs)
| c=='{' && c'=='+'
= (x=:="") &> maybeCode co (stateScan line (Meta y) y cs)
| c'=='\'' && elem c (infixIDs++delimiters)
= (x=:=[c]) &> maybeCode co $
case cs of
'\\':l:'\'':rest -> Letter ['\\',l] : (stateScan line (Code y) y rest)
'\\':a:b:d:'\'':rest ->
if all isDigit [a,b,d]
then Letter ['\\',a,b,d] : (stateScan line (Code y) y rest)
else error $ "Improperly terminated character found in line "
++ show line
l:'\'':rest -> Letter [l] : (stateScan line (Code y) y rest)
_ -> error $ "Improperly terminated character in line "++show line
| c=='\n'
= (x=:=c:y) &> stateScan (line+1) (Code co) y (c':cs)
| otherwise
= (x=:=c:y) &> stateScan line (Code co) y (c':cs)
where
y free
stateScan line (BigComment co) x (c:c':cs)
| c=='-' && c'=='}'
= (x=:="") &> BigComment co : (stateScan line (Code y) y cs)
| c=='\n'
= (x=:=c:y) &> stateScan (line+1) (BigComment co) y (c':cs)
| otherwise
= (x=:=c:y) &> stateScan line (BigComment co) y (c':cs)
where
y free
stateScan line (Meta co) x (c:c':cs)
| c=='+' && c'=='}'
= (x=:="") &> Meta co : (stateScan line (Code y) y cs)
| c=='\n'
= (x=:=c:y) &> stateScan (line+1) (Meta co) y (c':cs)
| otherwise
= (x=:=c:y) &> stateScan line (Meta co) y (c':cs)
where
y free
stateScan line (Text t) x (c:c':cs)
| c == '\"' = (x=:="") &> Text t : (stateScan line (Code y) y (c':cs))
| c == '\\' = (x=:=c:c':y) &> stateScan line (Text t) y cs
| elem c toBeEscaped = error $ "unescaped "++c:" encountered in line "
++ show line
| otherwise = (x=:=c:y) &> stateScan line (Text t) y (c':cs)
where
y free
modHead :: ([Char] -> [Char]) -> Tokens -> Tokens
modHead fs (Code c : ts)
= case break (\x->x=='\n'||x=='\r') c of
("","") -> modHead fs ts
("",n:rest) -> modHead (fs . (n:)) (Code rest : ts)
(line,"") -> if any (lineBeginsWith line) headers
then ModuleHead (fs line) : (modHeadInLine id ts)
else maybeMo (fs "") (Code c:ts)
(line,n:rest)-> if any (lineBeginsWith line) headers
then modHead (fs . (line++) . (n:)) (Code rest:ts)
else maybeMo (fs "") (Code c : ts)
modHead fs (BigComment c : ts) = maybeMo (fs "") (BigComment c : (modHead id ts))
modHead fs (SmallComment c : ts) = maybeMo (fs "") (SmallComment c : (modHead id ts))
modHead fs (Meta c : ts) = maybeMo (fs "") (Meta c : ts)
modHead fs [] = maybeMo (fs "") []
modHeadInLine :: ([Char] -> [Char]) -> Tokens -> Tokens
modHeadInLine fs [] = maybeMo (fs "") []
modHeadInLine fs (Code c : ts)
= case break (\x->x=='\n'||x=='\r') c of
(line,n:rest) -> modHead (fs . (line++) . (n:)) (Code rest : ts)
_ -> modHead fs (Code c : ts)
modHeadInLine fs (BigComment c : ts) =
maybeMo (fs "") (BigComment c : (modHeadInLine id ts))
modHeadInLine fs (SmallComment c : ts) =
maybeMo (fs "") (SmallComment c : (modHeadInLine id ts))
modHeadInLine fs (Meta c : ts) = maybeMo (fs "") (Meta c : ts)
headers :: [[Char]]
headers = [" ","import","infix","infixl","infixr","module"]
lineBeginsWith :: [Char] -> [Char] -> Bool
lineBeginsWith line s | length line < lens = False
| otherwise
= line==s ||
let (s',rest) = splitAt (length s) line
in s==s' && (null rest || isSep (head rest))
where
lens = length s
isSep :: Char -> Bool
isSep c = isSpace c || elem c infixIDs || elem c "([{"
infixIDs :: String
infixIDs = "~!@#$%^&*+-=<>?./|\\:"
delimiters :: String
delimiters = " ([{,}])"
toBeEscaped :: [Char]
toBeEscaped = "\\\n\r\t\""
maybeCode :: [Char] -> Tokens -> Tokens
maybeCode s ts = {-if s=="" then ts else-} Code s:ts
maybeMo s ts = if s=="" then ts else ModuleHead s:case ts of
Code c:ts' -> Code ('\n':c):ts'
_ -> ts
--- Yields the program code without comments
--- (but with the line breaks for small comments).
plainCode :: Tokens -> String
plainCode (ModuleHead s:ts) = case ts of
Code c : ts' -> s++drop 1 c++plainCode ts'
_ -> s++plainCode ts
plainCode (Code s:ts) = s++plainCode ts
plainCode (Text s:ts) = '\"':s++'\"':plainCode ts
plainCode (Letter s:ts) = '\'':s++'\'':plainCode ts
plainCode (BigComment _:ts) = plainCode ts
plainCode (SmallComment _:ts) = plainCode ts
plainCode (Meta s:ts) = "{+"++s++"+}"++plainCode ts
plainCode [] = ""
--- Inverse function of scan, i.e., unscan (scan x) = x.
--- unscan is used to yield a program after changing the list of tokens.
unscan :: Tokens -> String
unscan (ModuleHead s:ts) = s++case ts of
(Code (_:c):ts') -> unscan (Code c:ts')
_ -> unscan ts
unscan (Code s:ts) = s++unscan ts
unscan (Text s:ts) = '\"':s++'\"':unscan ts
unscan (Letter s:ts) = '\'':s++'\'':unscan ts
unscan (BigComment s:ts) = "{-"++s++"-}"++unscan ts
unscan (SmallComment s:ts) = "--"++s++unscan ts
unscan (Meta s:ts) = "{+"++s++"+}"++unscan ts
unscan [] = ""
--- return tokens for given filename
readScan :: [Char] -> IO Tokens
readScan fn = readFile fn >>= return . scan
--- test whether (unscan . scan) is identity
testScan :: [Char] -> IO ()
testScan fn = do c <- readFile fn
print (unscan (scan c)==c)
testWeave :: [Char] -> IO ()
testWeave fn = do c <- readFile fn
print (unscan (weave (unweaveCode (scan c)))==c)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment