From c75bdaddd20ab1ba82453cb4d53c413066c38d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9goire=20D=C3=A9trez?= Date: Fri, 4 Nov 2016 11:33:29 +0100 Subject: [PATCH] [#176] Haskell: Store node position in the AST When using bnfc to generate haskell code with the --functor option, the parser will no store the begining position of the corresponding code in each node. This might be useful, for instance, to report errors. Note that this is possibly a larger change than it had to be because I tried something new: instead of generating code by putting together strings (or the slightly fancier PrettyPrint.Doc), I refactored the code that makes happy production to generate an AST which is then pretty-printed. Both the AST types and the pretty printer being of course generated by BNFC itself! This seems like an obvious thing to do but for some reason we never did. The main advantage is that it makes much better use of the type checker (whene everything is a string, it doesn't mean much that the type checking passes...). In addition, you get much cleaner code by getting rid of all the separators, keywords, symbols and parentheses that are now inserted by the pretty printer. --- source/BNFC.cabal | 3 + source/src/BNFC/Backend/Haskell/AbsHappy.hs | 32 ++ source/src/BNFC/Backend/Haskell/CFtoHappy.hs | 416 +++++++++++------- source/src/BNFC/Backend/Haskell/Happy.cf | 31 ++ source/src/BNFC/Backend/Haskell/PrintHappy.hs | 119 +++++ .../BNFC/Backend/Haskell/CFtoHappySpec.hs | 191 ++++++++ 6 files changed, 632 insertions(+), 160 deletions(-) create mode 100644 source/src/BNFC/Backend/Haskell/AbsHappy.hs create mode 100644 source/src/BNFC/Backend/Haskell/Happy.cf create mode 100644 source/src/BNFC/Backend/Haskell/PrintHappy.hs diff --git a/source/BNFC.cabal b/source/BNFC.cabal index b3d9b1f4..6a963f22 100644 --- a/source/BNFC.cabal +++ b/source/BNFC.cabal @@ -118,6 +118,9 @@ Executable bnfc BNFC.Backend.Haskell.MkSharedString BNFC.Backend.Haskell.HsOpts BNFC.Backend.Haskell.Utils + BNFC.Backend.Haskell.AbsHappy + BNFC.Backend.Haskell.PrintHappy + -- Profile BNFC.Backend.HaskellProfile BNFC.Backend.HaskellProfile.CFtoHappyProfile diff --git a/source/src/BNFC/Backend/Haskell/AbsHappy.hs b/source/src/BNFC/Backend/Haskell/AbsHappy.hs new file mode 100644 index 00000000..d8f22d1f --- /dev/null +++ b/source/src/BNFC/Backend/Haskell/AbsHappy.hs @@ -0,0 +1,32 @@ +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveGeneric #-} +module BNFC.Backend.Haskell.AbsHappy where + +-- Haskell module generated by the BNF converter + + +import Data.Data (Data,Typeable) +import GHC.Generics (Generic) +newtype Ident = Ident String + deriving (Eq, Ord, Show, Read, Data, Typeable, Generic) +newtype Terminal = Terminal String + deriving (Eq, Ord, Show, Read, Data, Typeable, Generic) +data Production = P Ident Type [RightHandSide] + deriving (Eq, Ord, Show, Read, Data, Typeable, Generic) + +data RightHandSide = Rhs [Symbol] Expression + deriving (Eq, Ord, Show, Read, Data, Typeable, Generic) + +data Symbol = NonTerm Ident | QuotedTerm Terminal | IdentTerm Ident + deriving (Eq, Ord, Show, Read, Data, Typeable, Generic) + +data Expression + = EIdent Ident + | EPair Expression Expression + | EApp Expression Expression + deriving (Eq, Ord, Show, Read, Data, Typeable, Generic) + +data Type + = TIdent Ident | TPair Type Type | TList Type | TApp Type Type + deriving (Eq, Ord, Show, Read, Data, Typeable, Generic) + diff --git a/source/src/BNFC/Backend/Haskell/CFtoHappy.hs b/source/src/BNFC/Backend/Haskell/CFtoHappy.hs index 3814579f..7e4688f3 100644 --- a/source/src/BNFC/Backend/Haskell/CFtoHappy.hs +++ b/source/src/BNFC/Backend/Haskell/CFtoHappy.hs @@ -17,21 +17,18 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA -} -module BNFC.Backend.Haskell.CFtoHappy (cf2HappyS, convert) where +module BNFC.Backend.Haskell.CFtoHappy where + +import Data.String (IsString(..)) import BNFC.CF import BNFC.Backend.Common.StrUtils (escapeChars) -import BNFC.Backend.Haskell.Utils (parserName, catToType) ---import Lexer -import Data.Char +import BNFC.Backend.Haskell.Utils (parserName) +import qualified BNFC.Backend.Haskell.AbsHappy as Happy +import BNFC.Backend.Haskell.PrintHappy (printTree) +import Data.Char (isLower) import BNFC.Options (HappyMode(..)) import BNFC.PrettyPrint --- Type declarations - -type Rules = [(NonTerminal,[(Pattern,Action)])] -type Pattern = String -type Action = String -type MetaVar = String -- default naming @@ -58,13 +55,16 @@ cf2HappyS = cf2Happy cf2Happy name absName lexName errName mode byteStrings functor cf = unlines [header name absName lexName errName mode byteStrings, - render $ declarations mode (allEntryPoints cf), + -- directives + render $ declarations mode, + render $ vcat $ map (mkParserName functor) (allEntryPoints cf), tokens (cfTokens cf), - specialToks cf, + render $ nest 2 $ specialToks functor cf, delimiter, - specialRules byteStrings cf, - render $ prRules functor (rulesForHappy absName functor cf), - finalize byteStrings cf] + -- Productions + render $ productions absName byteStrings functor cf, + -- optional module trailer + finalize byteStrings functor cf] -- construct the header. header :: String -> String -> String -> String -> HappyMode -> Bool -> String @@ -83,23 +83,30 @@ header modName absName lexName errName mode byteStrings = unlines ] -- | The declarations of a happy file. --- >>> declarations Standard [Cat "A", Cat "B", ListCat (Cat "B")] --- %name pA A --- %name pB B --- %name pListB ListB +-- >>> declarations Standard -- -- no lexer declaration -- %monad { Err } { thenM } { returnM } -- %tokentype {Token} -declarations :: HappyMode -> [Cat] -> Doc -declarations mode ns = vcat - [ vcat $ map generateP ns - , case mode of +declarations :: HappyMode -> Doc +declarations mode = vcat + [ case mode of Standard -> "-- no lexer declaration" GLR -> "%lexer { myLexer } { Err _ }", "%monad { Err } { thenM } { returnM }", "%tokentype" <+> braces (text tokenName) ] - where generateP n = "%name" <+> parserName n <+> text n' - where n' = identCat n + +-- | Generate the parser name directive +-- See: https://www.haskell.org/happy/doc/html/sec-directives.html#sec-parser-name +-- Example: +-- %name pExp Exp +-- +-- In case we use a functor, we need to generate internal names: +-- %name pExp_internal Exp +mkParserName :: Bool -> Cat -> Doc +mkParserName functor cat = + "%name" <+> parserName cat <> (if functor then "_internal" else "") + <+> text (identCat cat) + -- The useless delimiter symbol. delimiter :: String @@ -119,117 +126,127 @@ tokens toks = "%token\n" ++ prTokens toks convert :: String -> Doc convert = quotes . text . escapeChars -rulesForHappy :: String -> Bool -> CF -> Rules -rulesForHappy absM functor cf = map mkOne $ ruleGroups cf + +-- ------------------------------------------------------------------------- -- +-- Productions +-- ------------------------------------------------------------------------- -- + +productions :: String -> Bool -> Bool -> CF -> Doc +productions absModule byteStrings functor cf = + vcat $ map (text . printTree) + (specialRules byteStrings functor cf + ++ + map (mkProduction absModule functor reversibles) rulesForNonInternalCats) where - mkOne (cat,rules) = (cat, map (constructRule absM functor reversibles) rules) + rulesForNonInternalCats = [ x | x@(_, _:_) <- ruleGroups cf] reversibles = cfgReversibleCats cf --- | For every non-terminal, we construct a set of rules. A rule is a sequence --- of terminals and non-terminals, and an action to be performed --- >>> constructRule "Foo" False [] (Rule "EPlus" (Cat "Exp") [Left (Cat "Exp"), Right "+", Left (Cat "Exp")]) --- ("Exp '+' Exp","Foo.EPlus $1 $3") --- --- If we're using functors, it adds an void value: --- >>> constructRule "Foo" True [] (Rule "EPlus" (Cat "Exp") [Left (Cat "Exp"), Right "+", Left (Cat "Exp")]) --- ("Exp '+' Exp","Foo.EPlus () $1 $3") --- --- List constructors should not be prefixed by the abstract module name: --- >>> constructRule "Foo" False [] (Rule "(:)" (ListCat (Cat "A")) [Left (Cat "A"), Right",", Left (ListCat (Cat "A"))]) --- ("A ',' ListA","(:) $1 $3") --- >>> constructRule "Foo" False [] (Rule "(:[])" (ListCat (Cat "A")) [Left (Cat "A")]) --- ("A","(:[]) $1") --- --- Coercion are much simpler: --- >>> constructRule "Foo" True [] (Rule "_" (Cat "Exp") [Right "(", Left (Cat "Exp"), Right ")"]) --- ("'(' Exp ')'","$2") --- --- As an optimization, a pair of list rules [C] ::= "" | C k [C] is --- left-recursivized into [C] ::= "" | [C] C k. --- This could be generalized to cover other forms of list rules. --- >>> constructRule "Foo" False [ListCat (Cat "A")] (Rule "(:)" (ListCat (Cat "A")) [Left (Cat "A"), Right",", Left (ListCat (Cat "A"))]) --- ("ListA A ','","flip (:) $1 $2") --- --- Note that functors don't concern list constructors: --- >>> constructRule "Abs" True [ListCat (Cat "A")] (Rule "(:)" (ListCat (Cat "A")) [Left (Cat "A"), Right",", Left (ListCat (Cat "A"))]) --- ("ListA A ','","flip (:) $1 $2") -constructRule :: String -> Bool -> [Cat] -> Rule -> (Pattern,Action) -constructRule absName functor revs r0@(Rule fun cat _) = (pattern, action) + +-- | Generate an expression that represent the semantic value of a non-terminal +-- according to a given rule. +action :: String -> Bool -> [Cat] -> Rule -> Happy.Expression +action absName functor reversibles r@(Rule label cat _) = + if functor then Happy.EPair position node else node where - (pattern,metavars) = generatePatterns revs r - action | isCoercion fun = unwords metavars - | isConsFun fun && elem cat revs = unwords ("flip" : fun : metavars) - | isNilCons fun = unwords (underscore fun : metavars) - | functor = unwords (underscore fun : "()" : metavars) - | otherwise = unwords (underscore fun : metavars) - r | isConsFun (funRule r0) && elem (valCat r0) revs = revSepListRule r0 - | otherwise = r0 - underscore f | isConsFun f || isNilCons f = f - | isDefinedRule f = absName ++ "." ++ f ++ "_" - | otherwise = absName ++ "." ++ f - --- Generate patterns and a set of metavariables indicating --- where in the pattern the non-terminal - -generatePatterns :: [Cat] -> Rule -> (Pattern,[MetaVar]) -generatePatterns revs r = case rhsRule r of - [] -> ("{- empty -}",[]) - its -> (unwords (map mkIt its), metas its) - where - mkIt i = case i of - Left c -> identCat c - Right s -> render (convert s) - metas its = [revIf c ('$': show i) | (i,Left c) <- zip [1 ::Int ..] its] - revIf c m = if not (isConsFun (funRule r)) && elem c revs - then "(reverse " ++ m ++ ")" - else m -- no reversal in the left-recursive Cons rule itself - --- We have now constructed the patterns and actions, --- so the only thing left is to merge them into one string. - --- | --- >>> prRules False [(Cat "Expr", [("Integer", "EInt $1"), ("Expr '+' Expr", "EPlus $1 $3")])] --- Expr :: { Expr } --- Expr : Integer { EInt $1 } | Expr '+' Expr { EPlus $1 $3 } --- --- if there's a lot of cases, print on several lignes: --- >>> prRules False [(Cat "Expr", [("Abcd", "Action"), ("P2", "A2"), ("P3", "A3"), ("P4", "A4"), ("P5","A5")])] --- Expr :: { Expr } --- Expr : Abcd { Action } --- | P2 { A2 } --- | P3 { A3 } --- | P4 { A4 } --- | P5 { A5 } --- --- >>> prRules False [(Cat "Internal", [])] -- nt has only internal use --- --- --- The functor case: --- >>> prRules True [(Cat "Expr", [("Integer", "EInt () $1"), ("Expr '+' Expr", "EPlus () $1 $3")])] --- Expr :: { (Expr ()) } --- Expr : Integer { EInt () $1 } | Expr '+' Expr { EPlus () $1 $3 } --- --- A list with coercion: in the type signature we need to get rid of the --- coercion --- >>> prRules True [(ListCat (CoercCat "Exp" 2), [("Exp2", "(:[]) $1"), ("Exp2 ',' ListExp2","(:) $1 $3")])] --- ListExp2 :: { [Exp ()] } --- ListExp2 : Exp2 { (:[]) $1 } | Exp2 ',' ListExp2 { (:) $1 $3 } -prRules :: Bool -> Rules -> Doc -prRules functor = vcat . map prOne + position = getPosition rhs' + node | isCoercion label = head metavars + | otherwise = foldl Happy.EApp constructor metavars + constructor + | isConsFun label && elem cat reversibles = Happy.EApp "flip" (fromString label) + | isNilCons label = fromString label + | functor = Happy.EApp label' position + | otherwise = label' + label' | isDefinedRule label = fromString (absName ++ "." ++ label ++ "_") + | otherwise = fromString (absName ++ "." ++ label) + metavars = do + (i, Left cat) <- zip [1..] rhs' + return $ + (if not (isConsFun label) && elem cat reversibles + then Happy.EApp "reverse" else id) + $ (if functor then Happy.EApp "snd" else id) + (mkMetaVariable i) + Rule _ _ rhs' | isConsFun label && elem cat reversibles = revSepListRule r + | otherwise = r + + +-- | This function is used when bnfc is called with the --functor option. +-- It generates the code that computes the current node position from the +-- positions stored in the meta-variables. +getPosition :: [Either Cat String] -> Happy.Expression +getPosition rhs = case rhs of + [] -> Happy.EIdent (Happy.Ident "Nothing") + (Left _:_) -> Happy.EApp "fst" metavar + (Right _:_) -> Happy.EApp "Just" (Happy.EApp "tokenLineCol" metavar) + where + metavar = mkMetaVariable 1 + +-- | Make a happy meta-variable, e.g. $2 +mkMetaVariable :: Int -> Happy.Expression +mkMetaVariable = Happy.EIdent . Happy.Ident . ("$" ++) . show + +-- Generate a pattern for the given rule. A pattern is a list of terminals and +-- non-terminals that goes in the right-hand side of a happy production. +mkPattern :: [Cat] -> Rule -> [Happy.Symbol] +mkPattern reversibles r@(Rule label cat _) = case rhs' of + [] -> [] + items -> [mkSymbol i | i <- items] + where + mkSymbol (Left cat) = Happy.NonTerm (mkNonTerminal cat) + mkSymbol (Right s) = Happy.QuotedTerm (mkTerminal s) + Rule _ _ rhs' | isConsFun label && elem cat reversibles = revSepListRule r + | otherwise = r + +mkNonTerminal :: Cat -> Happy.Ident +mkNonTerminal = Happy.Ident . identCat + + +-- | Create a terminal identifier for a bnfc terminal, i.e. a literal string +-- in a rule. +mkTerminal :: String -> Happy.Terminal +mkTerminal s = Happy.Terminal $ "'" ++ escapeChars s ++ "'" + +-- | Create a terminal identifier for a toker category +mkTokenTerminal :: String -> Happy.Symbol +mkTokenTerminal cat = Happy.IdentTerm $ Happy.Ident $ case cat of + "Ident" -> "L_ident" + "String" -> "L_quoted" + "Integer" -> "L_integ" + "Double" -> "L_doubl" + "Char" -> "L_charac" + _ -> "L_" ++ cat + +-- | Builds the return type of a production for a given category. +mkProductionType :: Bool -> Cat -> Happy.Type +mkProductionType functor cat + | functor = Happy.TPair maybeIntIntT (catToType (Just maybeIntIntT) cat) + | otherwise = catToType Nothing cat where - type' = catToType (if functor then Just "()" else Nothing) - prOne (_,[]) = empty -- nt has only internal use - prOne (nt,(p,a):ls) = - hsep [ nt', "::", "{", type' nt, "}" ] - $$ nt' <+> sep (pr ":" (p, a) : map (pr "|") ls) - where - nt' = text (identCat nt) - pr pre (p,a) = hsep [pre, text p, "{", text a , "}"] + maybeT = Happy.TIdent (Happy.Ident "Maybe") + intT = Happy.TIdent (Happy.Ident "Int") + maybeIntIntT = Happy.TApp maybeT (Happy.TPair intT intT) + catToType :: Maybe Happy.Type -> Cat -> Happy.Type + catToType _ InternalCat = error "Can't create a haskell type for internal category" + catToType t (ListCat c) = Happy.TList (catToType t c) + catToType (Just _) c@(TokenCat _) = catToType Nothing c + catToType (Just t) c = Happy.TApp (catToType Nothing c) t + catToType Nothing c = Happy.TIdent $ Happy.Ident $ show $ normCat c + +-- | Generate a Happy Production for the given set of rules +mkProduction :: String -> Bool -> [Cat] -> (Cat, [Rule]) -> Happy.Production +mkProduction absMod functor reversibles (cat, rules) = + Happy.P + (mkNonTerminal cat) + (mkProductionType functor cat) + (map mkRhs rules) + where + mkRhs rule = Happy.Rhs + (mkPattern reversibles rule) + (action absMod functor reversibles rule) -- Finally, some haskell code. -finalize :: Bool -> CF -> String -finalize byteStrings cf = unlines $ +finalize :: Bool -> Bool -> CF -> String +finalize byteStrings functor cf = unlines $ [ "{", "\nreturnM :: a -> Err a", @@ -244,12 +261,18 @@ finalize byteStrings cf = unlines $ " [Err _] -> \" due to lexer error\"", " t:_ -> \" before `\" ++ " ++ stringUnpack ++ "(prToken t) ++ \"'\"", "", - "myLexer = tokens" + "myLexer = tokens", + "", + if functor + then render $ vcat $ map mkParserFun (allEntryPoints cf) + else "" ] ++ definedRules cf ++ [ "}" ] where stringUnpack | byteStrings = "BS.unpack" | otherwise = "id" + mkParserFun cat = + parserName cat <+> "=" <+> "(>>= return . snd)" <+> "." <+> parserName cat <> "_internal" definedRules cf = [ mkDef f xs e | FunDef f xs e <- cfgPragmas cf ] @@ -267,35 +290,108 @@ definedRules cf = [ mkDef f xs e | FunDef f xs e <- cfgPragmas cf ] -- Markus's modifs 11/02/2002 -- GF literals -specialToks :: CF -> String -specialToks cf = unlines (map aux (literals cf)) - where aux cat = - case show cat of - "Ident" -> "L_ident { PT _ (TV $$) }" - "String" -> "L_quoted { PT _ (TL $$) }" - "Integer" -> "L_integ { PT _ (TI $$) }" - "Double" -> "L_doubl { PT _ (TD $$) }" - "Char" -> "L_charac { PT _ (TC $$) }" - own -> "L_" ++ own ++ " { PT _ (T_" ++ own ++ " " ++ posn ++ ") }" - where - posn = if isPositionCat cf cat then "_" else "$$" - -specialRules :: Bool -> CF -> String -specialRules byteStrings cf = unlines $ - map aux (literals cf) +specialToks :: Bool -> CF -> Doc +specialToks functor cf = vsep (map aux (literals cf)) + where aux cat = specialTok cat (isPositionCat cf cat || functor) + + +-- | Generate a Happy terminal symbol declaration for the BNFC token rules, +-- both built-in and user defined. +-- >>> specialTok (TokenCat "Ident") False +-- L_ident {PT _ (TV $$)} +-- +-- >>> specialTok (TokenCat "Foo") False +-- L_Foo {PT _ (T_Foo $$)} +-- +-- The boolean argument tells us if we need to keep the position, in which case +-- we cannot use the Happy $$ magic projection. +-- >>> specialTok (TokenCat "Bar") True +-- L_Bar {PT _ (T_Bar _)} +-- +-- Note that we might also need to keep the position for built-in tokens: +-- >>> specialTok (TokenCat "Integer") True +-- L_integ {PT _ (TI _)} +specialTok :: Cat -> Bool -> Doc +specialTok (TokenCat cat) keepPos = + terminalName cat <+> braces ("PT _" <+> parens (cons <+> posn)) + where + cons = case cat of + "Ident" -> "TV" + "String" -> "TL" + "Integer" -> "TI" + "Double" -> "TD" + "Char" -> "TC" + own -> "T_" <> text own + posn = if keepPos then "_" else "$$" +specialTok _ _ = error "specialTok is only for TokenCat" + + +specialRules :: Bool -> Bool -> CF -> [Happy.Production] +specialRules byteStrings functor cf = map aux (literals cf) where - aux cat = - case show cat of - "Ident" -> "Ident :: { Ident } : L_ident { Ident $1 }" - "String" -> "String :: { String } : L_quoted { "++stringUnpack++" $1 }" - "Integer" -> "Integer :: { Integer } : L_integ { (read ("++stringUnpack++" $1)) :: Integer }" - "Double" -> "Double :: { Double } : L_doubl { (read ("++stringUnpack++" $1)) :: Double }" - "Char" -> "Char :: { Char } : L_charac { (read ("++stringUnpack++" $1)) :: Char }" - own -> own ++ " :: { " ++ own ++ "} : L_" ++ own ++ " { " ++ own ++ " ("++ posn ++ "$1)}" - -- PCC: take "own" as type name? (manual says newtype) - where - posn = if isPositionCat cf cat then "mkPosToken " else "" - - stringUnpack - | byteStrings = "BS.unpack" - | otherwise = "" + aux cat = specialRule byteStrings (isPositionCat cf cat) functor cat + + +-- | Generate grammar rules for each kind of token in the LBNF grammar. +-- Rules for the built-in token types (Integer, String, Double...) return a +-- literate value in haskell (e.g. the rule for Integer will actually return +-- an integer). +specialRule :: Bool -> Bool -> Bool -> Cat -> Happy.Production +specialRule byteStrings isPositionToken keepPos c@(TokenCat cat) = Happy.P + (mkNonTerminal c) + (mkProductionType keepPos c) + [ Happy.Rhs + [mkTokenTerminal cat] + (mkTokenAction isPositionToken byteStrings keepPos cat)] + where +specialRule _ _ _ _ = error "specialRule is only for TokenCat" + + +-- | Generate the action (the expression in braces in the right-hand side of a +-- happy production) for Token categories +mkTokenAction :: Bool -> Bool -> Bool -> String -> Happy.Expression +mkTokenAction isPositionToken byteStrings functor catName = + if functor then Happy.EPair position value + else value + where + value = case catName of + "String" -> unpack (prToken metavar) + _ | catName `elem` ["Integer", "Double", "Char"] -> + Happy.EApp read (unpack (prToken metavar)) + _ -> Happy.EApp cons (mkPosToken (prToken metavar)) + -- generate a function application + app :: String -> Happy.Expression -> Happy.Expression + app f = Happy.EApp (Happy.EIdent (Happy.Ident f)) + position = app "Just" $ app "tokenLineCol" metavar + cons = Happy.EIdent (Happy.Ident catName) + read = Happy.EIdent (Happy.Ident "read") + metavar = mkMetaVariable 1 + unpack | byteStrings = app "BS.unpack" + | otherwise = id + mkPosToken | isPositionToken = app "mkPosToken" + | otherwise = id + prToken | functor = app "prToken" + | otherwise = id + +-- | Generate a terminal name for a given category name that will be used to +-- link a pattern in the %token declarations and the grammar rules. Note that +-- this is only meaningful for TokenCat. +terminalName :: String -> Doc +terminalName cat = + case cat of + "Ident" -> "L_ident" + "String" -> "L_quoted" + "Integer" -> "L_integ" + "Double" -> "L_doubl" + "Char" -> "L_charac" + own -> "L_" <> text own + + +-- | Useful to be able to use a literal string directly as an identifier in +-- an Happy AST. +-- E.g. instead of writing +-- Happy.EApp (Happy.EIdent (Happy.Ident "foo")) (Happy.EIdent (Happy.Ident "bar"") +-- We can write: +-- Happy.EApp "foo" "bar" +instance IsString Happy.Expression where + fromString = Happy.EIdent . Happy.Ident diff --git a/source/src/BNFC/Backend/Haskell/Happy.cf b/source/src/BNFC/Backend/Haskell/Happy.cf new file mode 100644 index 00000000..7d7a2b5e --- /dev/null +++ b/source/src/BNFC/Backend/Haskell/Happy.cf @@ -0,0 +1,31 @@ + +-- Production. Production ::= NonTerminal "::" Type ":" [RightHandSide] ; +-- RightHandSide. RightHandSide ::= [Symbol] { Expression } ; +-- NonTerminal. NonTerminal ::= Ident ; +-- IdentType. Type ::= Ident ; +-- PairType. Type ::= "(" Type "," Type ")" ; +-- AppType. Type ::= Type Type ; +-- Symbol. Symbol ::= NonTerminal. + + +token Terminal '"' ((char - ["\"\\"]) | ('\\' ["\"\\nt"]))* '"' ; + +P. Production ::= Ident "::" "{" Type "}" ":" [RightHandSide] ; +Rhs. RightHandSide ::= [Symbol] "{" Expression "}" ; +NonTerm. Symbol ::= Ident ; +QuotedTerm. Symbol ::= Terminal ; +IdentTerm. Symbol ::= Ident ; + +EIdent. Expression2 ::= Ident ; +EPair. Expression2 ::= "(" Expression "," Expression ")" ; +EApp. Expression1 ::= Expression1 Expression2 ; +coercions Expression 2 ; + +separator nonempty RightHandSide "|" ; +separator Symbol "" ; + +TIdent. Type2 ::= Ident ; +TPair. Type2 ::= "(" Type "," Type ")" ; +TList. Type2 ::= "[" Type "]" ; +TApp. Type1 ::= Type1 Type2 ; +coercions Type 2 diff --git a/source/src/BNFC/Backend/Haskell/PrintHappy.hs b/source/src/BNFC/Backend/Haskell/PrintHappy.hs new file mode 100644 index 00000000..d6294e0b --- /dev/null +++ b/source/src/BNFC/Backend/Haskell/PrintHappy.hs @@ -0,0 +1,119 @@ +{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} +module BNFC.Backend.Haskell.PrintHappy where + +-- pretty-printer generated by the BNF converter + +import BNFC.Backend.Haskell.AbsHappy +import Data.Char + + +-- the top-level printing method +printTree :: Print a => a -> String +printTree = render . prt 0 + +type Doc = [ShowS] -> [ShowS] + +doc :: ShowS -> Doc +doc = (:) + +render :: Doc -> String +render d = rend 0 (map ($ "") $ d []) "" where + rend i ss = case ss of + "[" :ts -> showChar '[' . rend i ts + "(" :ts -> showChar '(' . rend i ts + "{" :ts -> showChar '{' . new (i+1) . rend (i+1) ts + "}" : ";":ts -> new (i-1) . space "}" . showChar ';' . new (i-1) . rend (i-1) ts + "}" :ts -> new (i-1) . showChar '}' . new (i-1) . rend (i-1) ts + ";" :ts -> showChar ';' . new i . rend i ts + t : "," :ts -> showString t . space "," . rend i ts + t : ")" :ts -> showString t . showChar ')' . rend i ts + t : "]" :ts -> showString t . showChar ']' . rend i ts + t :ts -> space t . rend i ts + _ -> id + new i = showChar '\n' . replicateS (2*i) (showChar ' ') . dropWhile isSpace + space t = showString t . (\s -> if null s then "" else ' ':s) + +parenth :: Doc -> Doc +parenth ss = doc (showChar '(') . ss . doc (showChar ')') + +concatS :: [ShowS] -> ShowS +concatS = foldr (.) id + +concatD :: [Doc] -> Doc +concatD = foldr (.) id + +replicateS :: Int -> ShowS -> ShowS +replicateS n f = concatS (replicate n f) + +-- the printer class does the job +class Print a where + prt :: Int -> a -> Doc + prtList :: Int -> [a] -> Doc + prtList i = concatD . map (prt i) + +instance Print a => Print [a] where + prt = prtList + +instance Print Char where + prt _ s = doc (showChar '\'' . mkEsc '\'' s . showChar '\'') + prtList _ s = doc (showChar '"' . concatS (map (mkEsc '"') s) . showChar '"') + +mkEsc :: Char -> Char -> ShowS +mkEsc q s = case s of + _ | s == q -> showChar '\\' . showChar s + '\\'-> showString "\\\\" + '\n' -> showString "\\n" + '\t' -> showString "\\t" + _ -> showChar s + +prPrec :: Int -> Int -> Doc -> Doc +prPrec i j = if j prPrec i 0 (concatD [prt 0 id, doc (showString "::"), doc (showString "{"), prt 0 type_, doc (showString "}"), doc (showString ":"), prt 0 righthandsides]) + +instance Print RightHandSide where + prt i e = case e of + Rhs symbols expression -> prPrec i 0 (concatD [prt 0 symbols, doc (showString "{"), prt 0 expression, doc (showString "}")]) + prtList _ [x] = (concatD [prt 0 x]) + prtList _ (x:xs) = (concatD [prt 0 x, doc (showString "|"), prt 0 xs]) +instance Print Symbol where + prt i e = case e of + NonTerm id -> prPrec i 0 (concatD [prt 0 id]) + QuotedTerm terminal -> prPrec i 0 (concatD [prt 0 terminal]) + IdentTerm id -> prPrec i 0 (concatD [prt 0 id]) + prtList _ [] = (concatD []) + prtList _ (x:xs) = (concatD [prt 0 x, prt 0 xs]) +instance Print Expression where + prt i e = case e of + EIdent id -> prPrec i 2 (concatD [prt 0 id]) + EPair expression1 expression2 -> prPrec i 2 (concatD [doc (showString "("), prt 0 expression1, doc (showString ","), prt 0 expression2, doc (showString ")")]) + EApp expression1 expression2 -> prPrec i 1 (concatD [prt 1 expression1, prt 2 expression2]) + +instance Print Type where + prt i e = case e of + TIdent id -> prPrec i 2 (concatD [prt 0 id]) + TPair type_1 type_2 -> prPrec i 2 (concatD [doc (showString "("), prt 0 type_1, doc (showString ","), prt 0 type_2, doc (showString ")")]) + TList type_ -> prPrec i 2 (concatD [doc (showString "["), prt 0 type_, doc (showString "]")]) + TApp type_1 type_2 -> prPrec i 1 (concatD [prt 1 type_1, prt 2 type_2]) + + diff --git a/source/test/BNFC/Backend/Haskell/CFtoHappySpec.hs b/source/test/BNFC/Backend/Haskell/CFtoHappySpec.hs index 979d1155..0a105c61 100644 --- a/source/test/BNFC/Backend/Haskell/CFtoHappySpec.hs +++ b/source/test/BNFC/Backend/Haskell/CFtoHappySpec.hs @@ -1,16 +1,207 @@ module BNFC.Backend.Haskell.CFtoHappySpec where +import Control.Monad (forM_) +import Data.String (IsString(..)) + import Test.Hspec import Text.PrettyPrint (render) import BNFC.Backend.Haskell.CFtoHappy +import BNFC.Backend.Haskell.AbsHappy as Happy +import BNFC.Backend.Haskell.PrintHappy (printTree) +import BNFC.CF rendersTo a b = render a `shouldBe` b +shouldGenerate a b = printTree a `shouldBe` b spec = do + + describe "directives" $ do + describe "mkParserName" $ do + + it "base case" $ + mkParserName False (Cat "A") `rendersTo` "%name pA A" + it "list case" $ + mkParserName False (ListCat (Cat "A")) `rendersTo` "%name pListA ListA" + it "functor case" $ + mkParserName True (Cat "A") `rendersTo` "%name pA_internal A" + describe "convert" $ do it "quotes backslashes" $ convert "\\" `rendersTo` "'\\\\'" it "quotes backslashes as part of a longer string" $ convert "/\\" `rendersTo` "'/\\\\'" + + describe "getPosition" $ do + + it "returns 'Nothing' when the rule rhs is empty" $ + getPosition [] `shouldGenerate` "Nothing" + + it "uses `tokenLineCol` if the first item in rhs is a terminal" $ + getPosition [Right "bar"] `shouldGenerate` "Just (tokenLineCol $1)" + + it "uses `fst` if the item is a non-terminal" $ + getPosition [Left (Cat "Foo")] `shouldGenerate` "fst $1" + + describe "action" $ do + let addRule = Rule "EPlus" (Cat "Exp") [Left (Cat "Exp"), Right "+", Left (Cat "Exp")] + + it "generates a simple action" $ + action "Abs" False [] addRule `shouldGenerate` "Abs.EPlus $1 $3" + + it "adds the position when using functons" $ + action "Abs" True [] addRule `shouldGenerate` "(fst $1, Abs.EPlus (fst $1)(snd $1)(snd $3))" + + let listA = ListCat (Cat "A") + listB = ListCat (Cat "B") + rule = Rule "Foo" (Cat "L") [Left listA, Right "+", Left listB] + + it "applies the reverse function for list that need reversing" $ + action "Abs" False [listB] rule `shouldGenerate` "Abs.Foo $1 (reverse $3)" + + it "applies snd and reverse in the right order" $ + action "Abs" True [listB] rule + `shouldGenerate` "(fst $1, Abs.Foo (fst $1)(snd $1)(reverse (snd $3)))" + + it "doesn't prefix list constructors with the abstract module name" $ + action "Foo" False [] (Rule "(:)" (ListCat (Cat "A")) [Left (Cat "A"), Right",", Left (ListCat (Cat "A"))]) + `shouldGenerate` "(:) $1 $3" + + it "doesn't add position information to list constructors" $ + action "Foo" True [] (Rule "(:)" (ListCat (Cat "A")) [Left (Cat "A"), Right",", Left (ListCat (Cat "A"))]) + `shouldGenerate` "(fst $1, (:) (snd $1)(snd $3))" + + it "Generates action for coercions" $ + action "Foo" True [] (Rule "_" (Cat "Exp") [Right "(", Left (Cat "Exp"), Right ")"]) + `shouldGenerate` "(Just (tokenLineCol $1), snd $2)" + + it "flips the arguments for reversible lists" $ + let catA = Cat "A" + listA = ListCat catA + rule = Rule "(:)" listA [Left catA, Right",", Left listA] + in action "Foo" False [listA] rule `shouldGenerate` "flip (:) $1 $2" + + describe "mkNonTerminal" $ do + + it "generates a non-terminal for a given category" $ + mkNonTerminal (Cat "Foo") `shouldBe` "Foo" + + it "prepends List to list categories" $ + mkNonTerminal (ListCat (Cat "Foo")) `shouldBe` "ListFoo" + + it "includes the precedence level" $ + mkNonTerminal (CoercCat "Foo" 3) `shouldBe` "Foo3" + + describe "mkTokenTerminal" $ do + + it "generates special legacy names for token categories" $ + sequence_ + [ mkTokenTerminal "Integer" `shouldBe` IdentTerm "L_integ" + , mkTokenTerminal "Double" `shouldBe` IdentTerm "L_doubl" + , mkTokenTerminal "String" `shouldBe` IdentTerm "L_quoted" + , mkTokenTerminal "Char" `shouldBe` IdentTerm "L_charac" + , mkTokenTerminal "Ident" `shouldBe` IdentTerm "L_ident" + , mkTokenTerminal "MyTok" `shouldBe` IdentTerm "L_MyTok" + ] + + describe "mkTerminal" $ do + + it "escapes backslashes" $ + mkTerminal "\\" `shouldBe` "'\\\\'" + + it "escapes backslashes as part of a longer string" $ + mkTerminal "/\\" `shouldBe` "'/\\\\'" + + it "escapes single quotes" $ + mkTerminal "/\\" `shouldBe` "'/\\\\'" + + describe "mkPattern" $ do + + it "generates a simple pattern" $ + mkPattern [] (Rule "EPlus" (Cat "Exp") [Left (Cat "Exp"), Right "+", Left (Cat "Exp")]) + `shouldBe` [NonTerm "Exp", QuotedTerm "'+'", NonTerm "Exp"] + + it "reverses reversible lists" $ + let catA = Cat "A" + listA = ListCat catA + rule = Rule "(:)" listA [Left catA, Right ":", Left listA] + in mkPattern [listA] rule + `shouldBe` [NonTerm "ListA", NonTerm "A", QuotedTerm "':'"] + + describe "mkProductionType" $ do + + it "returns a single ident matching the name of the category if not using functor" $ + mkProductionType False (Cat "Exp") `shouldBe` TIdent "Exp" + + it "returns a list type for list categories" $ + mkProductionType False (ListCat (Cat "Exp")) `shouldBe` TList (TIdent "Exp") + + let typeMaybeIntInt = TApp (TIdent "Maybe") (TPair (TIdent "Int") (TIdent "Int")) + + it "returns a more complex type when using functor" $ + mkProductionType True (Cat "Exp") + `shouldBe` TPair typeMaybeIntInt (TApp (TIdent "Exp") typeMaybeIntInt) + + it "returns a more complex type when using functor and a list cat" $ + mkProductionType True (ListCat (Cat "Exp")) + `shouldBe` TPair typeMaybeIntInt (TList (TApp (TIdent "Exp") typeMaybeIntInt)) + + it "when using a functor skips the type arguments for token categories" $ + mkProductionType True (TokenCat "Ident") + `shouldBe` TPair typeMaybeIntInt (TIdent "Ident") + + it "testing the build-in tokens" $ + forM_ ["String", "Double", "Char", "Integer", "Ident"] $ \x -> + mkProductionType False (TokenCat x) `shouldBe` TIdent (Ident x) + + describe "mkTokenAction" $ do + + context "base case" $ do + it "String" $ + mkTokenAction False False False "String" `shouldBe` EIdent "$1" + it "Integer" $ + mkTokenAction False False False "Integer" `shouldBe` EApp (EIdent "read") (EIdent "$1") + it "Double" $ + mkTokenAction False False False "Double" `shouldBe` EApp (EIdent "read") (EIdent "$1") + it "Char" $ + mkTokenAction False False False "Char" `shouldBe` EApp (EIdent "read") (EIdent "$1") + it "Ident" $ + mkTokenAction False False False "Ident" `shouldBe` EApp (EIdent "Ident") (EIdent "$1") + it "user-defined" $ + mkTokenAction False False False "MyTok" `shouldBe` EApp (EIdent "MyTok") (EIdent "$1") + it "position token rule" $ + mkTokenAction True False False "MyTok" `shouldGenerate` "MyTok (mkPosToken $1)" + + context "using bytestrings" $ do + it "String" $ + mkTokenAction False True False "String" `shouldGenerate` "BS.unpack $1" + it "Integer" $ + mkTokenAction False True False "Integer" `shouldGenerate` "read (BS.unpack $1)" + it "Double" $ + mkTokenAction False True False "Double" `shouldGenerate` "read (BS.unpack $1)" + it "Char" $ + mkTokenAction False True False "Char" `shouldGenerate` "read (BS.unpack $1)" + it "Ident" $ + mkTokenAction False True False "Ident" `shouldBe` EApp (EIdent "Ident") (EIdent "$1") + it "user-defined" $ + mkTokenAction False True False "MyTok" `shouldBe` EApp (EIdent "MyTok") (EIdent "$1") + it "position token rule" $ + mkTokenAction True True False "MyTok" `shouldGenerate` "MyTok (mkPosToken $1)" + + context "using functor" $ do + it "String" $ + mkTokenAction False False True "String" + `shouldGenerate` "(Just (tokenLineCol $1), prToken $1)" + it "Integer" $ + mkTokenAction False False True "Integer" + `shouldGenerate` "(Just (tokenLineCol $1), read (prToken $1))" + it "user-defined" $ + mkTokenAction False False True "MyTok" + `shouldGenerate` "(Just (tokenLineCol $1), MyTok (prToken $1))" + +instance IsString Happy.Ident where + fromString = Happy.Ident + +instance IsString Happy.Terminal where + fromString = Happy.Terminal