From bdcc8f7cf380b8539273e6a54ba78258b606bade Mon Sep 17 00:00:00 2001
From: AiStudent
Date: Tue, 27 Aug 2024 14:54:29 +0200
Subject: [PATCH 1/8] Added Python backend for use with PLY
---
docs/user_guide.rst | 87 ++++
document/BNF_Converter_Python_Mode.html | 218 ++++++++++
source/BNFC.cabal | 9 +
source/main/Main.hs | 3 +
source/src/BNFC/Backend/Python.hs | 149 +++++++
source/src/BNFC/Backend/Python/CFtoPyAbs.hs | 382 ++++++++++++++++
source/src/BNFC/Backend/Python/CFtoPyLex.hs | 189 ++++++++
.../Backend/Python/CFtoPyPrettyPrinter.hs | 409 ++++++++++++++++++
source/src/BNFC/Backend/Python/CFtoPySkele.hs | 114 +++++
source/src/BNFC/Backend/Python/PyHelpers.hs | 80 ++++
source/src/BNFC/Backend/Python/RegToFlex.hs | 97 +++++
source/src/BNFC/Options.hs | 7 +
testing/src/ParameterizedTests.hs | 12 +
13 files changed, 1756 insertions(+)
create mode 100644 document/BNF_Converter_Python_Mode.html
create mode 100644 source/src/BNFC/Backend/Python.hs
create mode 100644 source/src/BNFC/Backend/Python/CFtoPyAbs.hs
create mode 100644 source/src/BNFC/Backend/Python/CFtoPyLex.hs
create mode 100644 source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs
create mode 100644 source/src/BNFC/Backend/Python/CFtoPySkele.hs
create mode 100644 source/src/BNFC/Backend/Python/PyHelpers.hs
create mode 100644 source/src/BNFC/Backend/Python/RegToFlex.hs
diff --git a/docs/user_guide.rst b/docs/user_guide.rst
index 041c508a..8273f1d5 100644
--- a/docs/user_guide.rst
+++ b/docs/user_guide.rst
@@ -284,3 +284,90 @@ BNFC adds the grammar name as a file extension. So if the grammar file is
named ``Calc.cf``, the lexer will be associated to the file extension
``.calc``. To associate other file extensions to a generated lexer, you need to
modify (or subclass) the lexer.
+
+Python Backend
+===============
+
+The BNF Converter's Python Backend generates a Python frontend, that uses
+`PLY `_ (Python Lex Yacc), to parse
+input into an AST (abstract syntax tree).
+
+Python 3.10 or higher is needed.
+
+Example usage: ::
+
+ bnfc --python Calc.cf
+
+
+.. list-table:: The result is a set of files:
+ :widths: 25 25
+ :header-rows: 1
+
+ * - Filename
+ - Description
+ * - bnfcPyGenCalc/Absyn.py
+ - Provides the classes for the abstract syntax.
+ * - bnfcPyGenCalc/LexTokens.py
+ - Provides PLY with the information needed to build the lexer.
+ * - bnfcPyGenCalc/ParserDefs.py
+ - Provides PLY with the information needed to build the parser.
+ * - bnfcPyGenCalc/PrettyPrinter.py
+ - Provides printing for both the AST and the linearized tree.
+ * - genTest.py
+ - A ready test-file, that uses the generated frontend to convert input into an AST.
+ * - skele.py
+ - Provides skeleton code to deconstruct an AST, using structural pattern matching.
+
+Optionally one may with ``-m``` also create a makefile that contains the target
+"distclean" to remove the generated files.
+
+Testing the frontend
+....................
+
+It's possible to pipe input, like::
+
+ echo "(1 + 2) * 3" | python3.10 genTest.py
+
+or::
+
+ python3.10 genTest.py < file.txt
+
+and it's possible to just use an argument::
+
+ python3.10 genTest.py file.txt
+
+
+Caveats
+.......
+
+Presentation of conflicts in a grammar:
+
+ A symbol-to-unicode transformation is made for the terminals in the grammar,
+ for example from "++" to "S_43_43". This however obfuscates PLYs generated
+ information of the grammar in the "parser.out" file. Users are hence
+ encouraged to use the Haskell backend to debug grammars and identify
+ conflicts.
+
+Several entrypoints:
+
+ At the top of the ParserDefs.py file an additional rule is added, that has
+ every defined entrypoint as a possible production. This may create warnings
+ for conflicts, as it may introduce ambiguity. Therefore the added
+ parsing rule is by default removed beneath the function, with the statement
+ "del p__Start", and included if the user comments out the removal of
+ "p__Start".
+
+Special cases for special characters:
+
+ Using non-special characters, instead of say parentheses when defining rules,
+ may not yield the expected behaviour. Using the below rule, an expression
+ such as "a1+2a" can not be parsed since the a's are classified as reserved
+ keywords, like "int", instead of symbols like "+"::
+
+ _. Exp1 ::= "a" Exp "a" ;
+
+Results from the parameterized tests:
+
+ While the Python backend generates working frontends for the example
+ grammars, four "failures" and six "errors" among the regression
+ tests are reported.
diff --git a/document/BNF_Converter_Python_Mode.html b/document/BNF_Converter_Python_Mode.html
new file mode 100644
index 00000000..429b5c44
--- /dev/null
+++ b/document/BNF_Converter_Python_Mode.html
@@ -0,0 +1,218 @@
+
+
+
+ BNF Converter Python Mode
+
+
+
+
+
BNF Converter
+Python Mode
+
+By Björn Werner
+
+2024
+
+ The BNF Converter's Python Backend generates a Python frontend, that uses
+ PLY (Python Lex Yacc), to parse input into an AST (abstract syntax tree).
+
+
+ BNFC on Github:
+ https://github.com/BNFC/bnfc
+
+
+ PLY homepage:
+ https://www.dabeaz.com/ply/ply.html
+
+
+ Python 3.10 or higher is needed.
+
+Usage
+
+ bnfc --python NAME.cf
+
+
+The result is a set of files:
+
+
+
+ Filename: | Description: |
+
+
+ bnfcGenNAME/LexTokens.py | Provides PLY with the information needed to build the lexer. |
+
+
+ bnfcGenNAME/Absyn.py | Provides the classes for the abstract syntax. |
+
+
+ bnfcGenNAME/ParserDefs.py | Provides PLY with the information needed to build the parser. |
+
+
+ bnfcGenNAME/PrettyPrinter.py | Provides printing for both the AST and the linearized tree. |
+
+
+ genTest.py | A ready test-file, that uses the generated frontend to convert input into an AST. |
+
+
+ skele.py | Provides skeleton code to deconstruct an AST, using structural pattern matching. |
+
+
+
+Testing the frontend
+
+ The following example uses a frontend that is generated from a C-like grammar.
+
+
+ $ python3.10 genTest.py < hello.c
+
+
+ Generating LALR tables
+ Parse Successful!
+
+ [Abstract Syntax]
+ (PDefs [(DFun Type_int "main" [] [(SExp (EApp "printString" [(EString "Hello world")])), (SReturn (EInt 0))])])
+
+ [Linearized Tree]
+ int main ()
+ {
+ printString ("Hello world");
+ return 0;
+ }
+
+
+ The LALR tables are cached in a file called "parsetab.py", and a description by PLY of the grammar is stored in a file called "parser.out".
+
+The Abstract Syntax Tree
+
+ The AST is built up using instances of Python classes, using the dataclass decorator, such as:
+
+
+@dataclass
+class EAdd:
+ exp_1: Exp
+ exp_2: Exp
+ _ann_type: _AnnType = field(default_factory=_AnnType)
+
+
+ The "_ann_type" variable is a placeholder that can be used to store useful information,
+ for example type-information in order to create a type-annotated AST.
+
+Using the skeleton file
+
+ The skeleton file serves as a template, to create an interpreter for example.
+ Two different types of matchers are generated: the first with all the value
+ categories together, and a second type where each matcher only has one
+ individual value category, as in the example below:
+
+
+def matcherExp(exp_: Exp):
+ match exp_:
+ case EAdd(exp_1, exp_2, _ann_type):
+ # Exp "+" Exp1
+ raise Exception('EAdd not implemented')
+ case ESub(exp_1, exp_2, _ann_type):
+ ...
+
+
+ This can be modified, in order to return the addition of each evaluated argument
+ category, into:
+
+
+ def matcherExp(exp_: Exp):
+ match exp_:
+ case EAdd(exp_1, exp_2, _ann_type):
+ # Exp "+" Exp1
+ return matcherExp(exp_1) + matcherExp(exp_2)
+ case ESub(exp_1, exp_2, _ann_type):
+ ...
+
+
+ The function can now be imported and used in the generated test file
+ (similarly to how the pretty printer is imported and used):
+
+
+ from skele import matcherExp
+ ...
+ print(matcherExp(ast))
+
+
+Known issues
+
+ Presentation of conflicts in a grammar:
+
+
+ A symbol-to-unicode transformation is made for the terminals in the grammar,
+ for example from "++" to "S_43_43". This however obfuscates PLYs generated
+ information of the grammar, inside the "parser.out" file. Users are hence
+ encouraged to use say the Haskell backend to debug their
+ grammars and identify conflicts.
+
+
+ Several entrypoints:
+
+
+ At the top of the ParserDefs.py file an additional rule is added, that has
+ every defined entrypoint as a possible production. This may create warnings
+ for conflicts if it introduces ambiguity, and warnings for unused rules if
+ the "_Start" category is not used as the entrypoint. Therefore the added
+ parsing rule is by default removed beneath the function, "del p__Start",
+ and included if the user comments out the removal:
+
+
+ Skeleton code for using lists as entrypoints:
+
+
+ Matchers for using lists, such as [Exp], are not generated in the
+ skeleton code as it may confuse users if the grammar uses several different
+ list categories. Users are instead encouraged to use a non-list entrypoint.
+
+
+ The improper way to iterate over lists, as the value category is unknown:
+
+
+ case list():
+ for ele in ast:
+ ...
+
+
+ The proper way to deconstruct lists, where we know the value category:
+
+
+ case RuleName(listexp_):
+ for exp in listexp_:
+ ...
+
+
+ Special cases for special characters
+
+
+ Using non-special characters instead of say parentheses when defining rules, may not yield the expected
+ behaviour. Using the below rule, an expression such as "a1+2a" can not be parsed.
+
+
+ _. Exp1 ::= "a" Exp "a" ;
+
+
+ Using multiple separators
+
+
+ Using multiple separators for the same category, such as below, generates
+ Python functions with overlapping names, causing runtime errors.
+
+
+ separator Exp1 "," ;
+ separator Exp1 ";" ;
+
\ No newline at end of file
diff --git a/source/BNFC.cabal b/source/BNFC.cabal
index 7300a8d2..c232b401 100644
--- a/source/BNFC.cabal
+++ b/source/BNFC.cabal
@@ -280,6 +280,15 @@ library
BNFC.Backend.TreeSitter.CFtoTreeSitter
BNFC.Backend.TreeSitter.RegToJSReg
+ -- Python backend
+ BNFC.Backend.Python
+ BNFC.Backend.Python.CFtoPyAbs
+ BNFC.Backend.Python.CFtoPyLex
+ BNFC.Backend.Python.CFtoPyPrettyPrinter
+ BNFC.Backend.Python.RegToFlex
+ BNFC.Backend.Python.PyHelpers
+ BNFC.Backend.Python.CFtoPySkele
+
----- Testing --------------------------------------------------------------
test-suite unit-tests
diff --git a/source/main/Main.hs b/source/main/Main.hs
index 754bf268..6377611f 100644
--- a/source/main/Main.hs
+++ b/source/main/Main.hs
@@ -26,6 +26,7 @@ import BNFC.Backend.Latex
import BNFC.Backend.OCaml
import BNFC.Backend.Pygments
import BNFC.Backend.TreeSitter
+import BNFC.Backend.Python
import BNFC.CF (CF)
import BNFC.GetCF
import BNFC.Options hiding (make, Backend)
@@ -83,3 +84,5 @@ maketarget = \case
TargetPygments -> makePygments
TargetCheck -> error "impossible"
TargetTreeSitter -> makeTreeSitter
+ TargetPython -> makePython
+
\ No newline at end of file
diff --git a/source/src/BNFC/Backend/Python.hs b/source/src/BNFC/Backend/Python.hs
new file mode 100644
index 00000000..e0a3da35
--- /dev/null
+++ b/source/src/BNFC/Backend/Python.hs
@@ -0,0 +1,149 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+{-
+ BNF Converter: Python main file
+ Copyright (C) 2004 Author: Bjorn Werner
+-}
+
+module BNFC.Backend.Python (makePython) where
+
+import Prelude hiding ((<>))
+import System.FilePath ((>))
+import BNFC.CF (CF, firstEntry)
+import BNFC.Options (SharedOptions, optMake, lang)
+import BNFC.Backend.Base (MkFiles, mkfile)
+import BNFC.Backend.Python.CFtoPyAbs (cf2PyAbs)
+import BNFC.Backend.Python.CFtoPyLex (cf2PyLex)
+import BNFC.Backend.Python.CFtoPyPrettyPrinter (cf2PyPretty)
+import BNFC.Backend.Python.CFtoPySkele (cf2PySkele)
+import BNFC.Backend.Python.PyHelpers
+
+import BNFC.PrettyPrint -- For Doc
+import qualified BNFC.Backend.Common.Makefile as Makefile
+
+-- | Entrypoint for BNFC to use the Python backend.
+makePython :: SharedOptions -> CF -> MkFiles ()
+makePython opts cf = do
+ let pkgName = "bnfcPyGen" ++ name
+ let (lexerDefs, tokensPly) = cf2PyLex cf
+ let (parsingDefs, abstractClasses) = cf2PyAbs pkgName cf tokensPly
+ let prettyPrinter = cf2PyPretty pkgName cf
+ let skeletonCode = cf2PySkele pkgName cf
+ mkPyFile (pkgName ++ "/LexTokens.py") lexerDefs
+ mkPyFile (pkgName ++ "/ParsingDefs.py") parsingDefs
+ mkPyFile (pkgName ++ "/Absyn.py") abstractClasses
+ mkPyFile (pkgName ++ "/PrettyPrinter.py") prettyPrinter
+ mkPyFile "skele.py" skeletonCode
+ mkPyFile "genTest.py" (pyTest pkgName cf)
+ Makefile.mkMakefile (optMake opts) $ makefile pkgName (optMake opts)
+ where
+ name :: String
+ name = lang opts
+ mkPyFile x = mkfile x comment
+
+
+-- | A makefile with distclean and clean specifically for the testsuite. No
+-- "all" is needed as bnfc has already generated the necessary Python files.
+makefile :: String -> Maybe String -> String -> Doc
+makefile pkgName optMakefileName basename = vcat
+ [
+ Makefile.mkRule "all" []
+ [ " " ]
+ , Makefile.mkRule "clean" []
+ [ "rm -f parser.out parsetab.py" ]
+ , Makefile.mkRule "distclean" [ "vclean" ] []
+ , Makefile.mkRule "vclean" []
+ [ "rm -f " ++ unwords
+ [
+ pkgName ++ "/LexTokens.py",
+ pkgName ++ "/ParsingDefs.py",
+ pkgName ++ "/Absyn.py",
+ pkgName ++ "/PrettyPrinter.py",
+ pkgName ++ "/LexTokens.py.bak",
+ pkgName ++ "/ParsingDefs.py.bak",
+ pkgName ++ "/Absyn.py.bak",
+ pkgName ++ "/PrettyPrinter.py.bak",
+ "skele.py",
+ "genTest.py",
+ "skele.py.bak",
+ "genTest.py.bak"
+ ],
+ "rm -f " ++ pkgName ++ "/__pycache__/*.pyc",
+ "rm -fd " ++ pkgName ++ "/__pycache__",
+ "rmdir " ++ pkgName,
+ "rm -f __pycache__/*.pyc",
+ "rm -fd __pycache__",
+ "rm -f parser.out parsetab.py",
+ "rm -f " ++ makefileName,
+ "rm -f " ++ makefileName ++ ".bak"
+ ]
+ ]
+ where
+ makefileName = case optMakefileName of
+ Just s -> s
+ Nothing -> "None" -- No makefile will be created.
+
+
+-- | Put string into a comment.
+comment :: String -> String
+comment x = "# " ++ x
+
+
+-- Produces the content for the testing file, genTest.py.
+pyTest :: String -> CF -> String
+pyTest pkgName cf = unlines
+ [
+ "from ply.lex import lex",
+ "from ply.yacc import yacc",
+ "import sys",
+ "from " ++ pkgName ++ ".LexTokens import *",
+ "from " ++ pkgName ++ ".ParsingDefs import *",
+ "from " ++ pkgName ++ ".PrettyPrinter import *",
+ "",
+ "",
+ "# Suggested input options:",
+ "# python3.10 genTest.py < sourcefile",
+ "# python3.10 genTest.py sourcefile inputfile (i.e. for interpreters).",
+ "inputFile = None",
+ "if len(sys.argv) > 1:",
+ "\tf = open(sys.argv[1], 'r')",
+ "\tinp = f.read()",
+ "\tf.close()",
+ "\tif len(sys.argv) > 2:",
+ "\t\tinputFile = sys.argv[2]",
+ "else:",
+ "\tinp = ''",
+ "\tfor line in sys.stdin:",
+ "\t\tinp += line",
+ "",
+ "",
+ "# Customizable error handling for the parsing",
+ "def p_error(p: lex.LexToken):",
+ "\tif p is None:",
+ "\t\tprint('No rule could reduce the tokenized input')",
+ "\telse:",
+ "\t\tprint('line:', p.lineno, 'lexpos:', p.lexpos, f'Syntax error at {p.value!r}')",
+ "\t\tp.lexer.syntaxError = True",
+ "",
+ "",
+ "# By default the first entrypoint is used. See ParsingDefs.py for alternatives.",
+ "lexer = lex.lex()",
+ "parser = yacc(start=" ++ defaultEntry ++ ")",
+ "lexer.syntaxError = False",
+ "ast = parser.parse(inp, lexer=lexer)",
+ "if ast and not lexer.syntaxError:",
+ "\tprint('Parse Successful!\\n')",
+ "\tprint('[Abstract Syntax]')",
+ "\tprint(printAST(ast))",
+ "\tprint('\\n[Linearized Tree]')",
+ "\tlinTree = lin(ast)",
+ "\tprint(renderC(linTree))",
+ "\tprint()",
+ "else:",
+ "\tprint('Parse failed')",
+ "\tquit(1)"
+ ]
+ where
+ defaultEntry = (addCitationSigns . translateToList . show . firstEntry) cf
+
diff --git a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
new file mode 100644
index 00000000..a4712fb0
--- /dev/null
+++ b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
@@ -0,0 +1,382 @@
+
+{-
+ BNF Converter: Python abstract syntax and parsing definitions generator
+ Copyright (C) 2024 Author: Bjorn Werner
+ Based on CFtoCAbs.hs, Copyright (C) 2004 Michael Pellauer
+-}
+
+module BNFC.Backend.Python.CFtoPyAbs (cf2PyAbs) where
+import Data.List ( nub, intercalate )
+import BNFC.CF
+import BNFC.Backend.Python.PyHelpers
+import BNFC.Backend.Common.NamedVariables
+import Text.PrettyPrint (Doc, render)
+import Data.Either (lefts)
+import Data.Char (toLower)
+import qualified Data.List.NonEmpty as List1
+
+-- | The result is ParsingDefs.py & Absyn.py
+cf2PyAbs
+ :: String
+ -> CF -- ^ Grammar.
+ -> [(String, String)] -- Tokens to unicode mapping
+ -> (String, String) -- ParsingDefs.py, Absyn.py.
+cf2PyAbs pkgName cf tokensPly = ( unlines
+ [ "from " ++ pkgName ++ ".Absyn import *"
+ , "\n\n" ++ createCommonEntrypointDef cf
+ , "\n\n" ++ (unlines parsingDefs)
+ , if length definesParsingDefs > 0
+ then "\n\n# Parsing rules from defines"
+ else ""
+ , "\n\n" ++ unlines definesParsingDefs
+ ]
+ , "from typing import List as _List" ++
+ "\n\n# Value categories (no coercsions):" ++
+ "\n\n" ++ unlines valueCatsClasses ++
+ "\n\n" ++ placeholderVariableClass ++
+ "\n\n# Rules:" ++
+ "\n" ++ "from dataclasses import dataclass, field" ++
+ "\n\n" ++ (unlines dataClasses)
+ )
+ where
+ rules = cfgRules cf
+
+ -- To create ParsingDefs.py
+ parsingDefs :: [String]
+ parsingDefs = map (ruleToParsingDef cf tokensPly)
+ [r | r <- rules, isParsable r, not (isDefinedRule r)]
+
+ definesParsingDefs = makeDefineParsingDefs cf tokensPly
+
+ -- To create Absyn.py
+ dataClasses :: [String]
+ dataClasses = map makePythonClass
+ [ r | r <- rules, not (isDefinedRule r)
+ , not (isNilCons r)
+ , not (isCoercion r)
+ ]
+
+ rulesNoListConstructors =
+ [r | r <- (cfgRules cf), not (isNilCons r), not (isCoercion r) ]
+
+ -- Note: Custom tokens are set to inherit "str".
+ valueCatNames = nub $
+ (map (show . normCat . valCat) rulesNoListConstructors) ++
+ (map (++"(str)") (tokenNames cf)) ++
+ [ "String(str)"
+ , "Char(str)"
+ , "Ident(str)"
+ , "Integer(int)"
+ , "Double(float)"
+ ]
+
+ valueCatsClasses = map createValueCatClass valueCatNames
+
+
+placeholderVariableClass :: String
+placeholderVariableClass = unlines
+ [ "# Placeholder to add additional information to a node in the AST," ++
+ " like type information."
+ , "class _AnnType:"
+ , " def __init__(self):"
+ , " self.__v = None"
+ , ""
+ , " def s(self, val):"
+ , " if not self.__v == None:"
+ , " if self.__v != val:"
+ , " raise Exception('already has type: ' + str(self.__v)" ++
+ " + ' and tried to set to ' + str(val))"
+ , " self.__v = val"
+ , ""
+ , " def g(self):"
+ , " return self.__v"
+ , ""
+ , " def __str__(self):"
+ , " return str(self.__v.__class__)"
+ , ""
+ , " def __repr__(self):"
+ , " return str(self.__v.__class__)"
+ ]
+
+-- | Creates a parsing definition that points to all entrypoints.
+createCommonEntrypointDef :: CF -> String
+createCommonEntrypointDef cf = unlines
+ [ "def p__Start(p):"
+ , " '''"
+ , " _Start : " ++ (translateToList . show . head) cats ++
+ concat (map createCase (tail cats))
+ , " '''"
+ , " p[0] = p[1]"
+ , ""
+ , ""
+ , "# Comment the below line to enable the '_Start' entrypoint (may yield"
+ ++ " conflict warnings)."
+ , "del p__Start"
+ , ""
+ ]
+ where
+ cats = (List1.toList . allEntryPoints) cf
+
+ createCase :: Cat -> String
+ createCase c = "\n | " ++ translateToList (show c)
+
+
+-- | The value categories become abstract classes, for type hinting.
+createValueCatClass :: String -> String
+createValueCatClass s = "class " ++ s ++ ":\n\tpass\n"
+
+
+-- | Creates a parsing definition, by checking what type of rule it is and
+-- calling the corresponding make function.
+ruleToParsingDef :: CF -> [(String, String)] -> Rul RFun -> String
+ruleToParsingDef cf tokensPly rule
+ | isCoercion funcRStr =
+ makeParseCoercion cf tokensPly funcCat (fName, sentForm)
+ | isNilFun funcRStr =
+ makeParseNil tokensPly funcCat (fNameTranslated, sentForm)
+ | isOneFun funcRStr =
+ makeParseOne cf tokensPly funcCat (fNameTranslated, sentForm)
+ | isConsFun funcRStr =
+ makeParseCons cf tokensPly funcCat (fNameTranslated, sentForm)
+ | isDefinedRule rule =
+ error "Should not generate define rules in this step"
+ | otherwise =
+ makeParseFunc cf tokensPly funcCat (fName, sentForm)
+ where
+ funcRStr = funRule rule :: RString
+ fName = wpThing funcRStr :: String
+
+ funcCat = valCat rule :: Cat
+ catStr = show (valCat rule) :: String
+
+ fNameTranslated :: String
+ fNameTranslated
+ | isNilFun funcRStr = catStr
+ | otherwise = fName
+
+ sentForm = rhsRule rule :: [Either Cat String]
+
+
+-- | Make a Python class from a rule's name and production.
+makePythonClass :: Rul RFun -> String
+makePythonClass rule =
+ "@dataclass\n" ++
+ "class " ++ name ++ ":\n" ++
+ if length cats == 0 then "\tpass\n" else classBody
+ where
+ name = funName rule
+ sentForm = rhsRule rule
+ cats = lefts sentForm
+ nvCats = numVars sentForm :: [Either (Cat, Doc) String]
+
+ enumeratedVarsWithType = [render d ++ ": " ++
+ strCatToPyTyping (show (normCat c)) | (c, d) <- lefts nvCats]
+
+ classBody = unlines $ map ("\t" ++) (enumeratedVarsWithType ++
+ ["_ann_type: _AnnType = field(default_factory=_AnnType)"])
+
+
+
+-- | Creates the corresponding type hinting for some member variable.
+strCatToPyTyping :: String -> String
+strCatToPyTyping s =
+ if strIsList s then "_List['" ++ (tail . init) s ++ "']" else s
+
+
+-- | It could be this is only guarding against list categories.
+literalsToPytypeMaybe :: CF -> String -> Maybe String
+literalsToPytypeMaybe cf s = case s of
+ "Integer" -> Just "Integer"
+ "Double" -> Just "Double"
+ "Char" -> Just "Char"
+ "String" -> Just "String"
+ "Ident" -> Just "Ident"
+ _ -> if s `elem` (tokenNames cf) then Just s else Nothing
+
+
+-- | The following makeParse functions create their corresponding parsing
+-- definitions for some rule.
+makeParseFunc :: CF -> [(String, String)] -> Cat -> (String, SentForm)
+ -> String
+makeParseFunc cf tokensPly dataCat (name, sentForm) = unlines
+ [ "def " ++ "p_" ++ name ++ "(p):\n" ++ "\t" ++ "\"\"\""
+ , "\t" ++ (show dataCat) ++ " : " ++ (prodToDocStr tokensPly sentForm)
+ , "\t" ++ "\"\"\""
+ , "\t" ++ "p[0] = " ++ rhs ++ "\n"
+ ]
+ where
+ rhs = name ++ "(" ++ (addCommas (getLeftIndexes cf 1 sentForm)) ++ ")"
+
+
+makeParseCoercion :: CF -> [(String, String)] -> Cat -> (String, SentForm)
+ -> String
+makeParseCoercion cf tokensPly dataCat (_, sentForm) = unlines
+ [ "def " ++ "p_" ++ (show sourceCat) ++ "(p):\n" ++ "\t" ++ "\"\"\""
+ , "\t" ++ (show dataCat) ++ " : " ++ (prodToDocStr tokensPly sentForm)
+ , "\t" ++ "\"\"\""
+ , "\t" ++ "p[0] = " ++ strP ++ "\n"
+ ]
+ where
+ strP = head (getLeftIndexes cf 1 sentForm)
+ sourceCat = (head . lefts) sentForm
+
+
+makeParseNil :: [(String, String)] -> Cat -> (String, SentForm) -> String
+makeParseNil tokensPly dataCat (_, sentForm) = unlines
+ [ "def " ++ "p_" ++ "Nil" ++ translatedCat ++ "(p):\n" ++ "\t" ++ "\"\"\""
+ , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm)
+ , "\t" ++ "\"\"\""
+ , "\t" ++ "p[0] = []\n"
+ ]
+ where
+ translatedCat = translateToList $ show dataCat
+
+
+makeParseOne :: CF -> [(String, String)] -> Cat -> (String, SentForm) -> String
+makeParseOne cf tokensPly dataCat (_, sentForm) = unlines
+ [ "def " ++ "p_" ++ "One" ++ translatedCat ++ "(p):\n" ++ "\t" ++ "\"\"\""
+ , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm)
+ , "\t" ++ "\"\"\""
+ , "\t" ++ "p[0] = " ++ rhs ++ "\n"
+ ]
+ where
+ translatedCat = translateToList $ show dataCat
+ rhs = intercalate " + " (getLeftIndexesLists tokensPly cf 1 sentForm)
+
+
+makeParseCons :: CF -> [(String, String)] -> Cat -> (String, SentForm)
+ -> String
+makeParseCons cf tokensPly dataCat (_, sentForm) = unlines
+ [ "def " ++ "p_" ++ "Cons" ++ translatedCat ++ "(p):\n" ++ "\t" ++ "\"\"\""
+ , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm)
+ , "\t" ++ "\"\"\"" ++ "\n"
+ , "\t" ++ "p[0] = " ++ rhs ++ "\n"
+ ]
+ where
+ translatedCat = translateToList $ show dataCat
+ rhs = intercalate " + " (getLeftIndexesLists tokensPly cf 1 sentForm)
+
+
+-- | Produces a list of the elements in the code production, where the indices
+-- match the argument categories.
+getLeftIndexesLists :: [(String, String)] -> CF -> Int -> [Either Cat String]
+ -> [String]
+getLeftIndexesLists _ _ _ [] = []
+getLeftIndexesLists tokensPly cf n (Left c:ecs)
+ | isList c = [typedPTerm] ++ (getLeftIndexesLists tokensPly cf (n+1) ecs)
+ | otherwise = ["[" ++ typedPTerm ++ "]"] ++
+ (getLeftIndexesLists tokensPly cf (n+1) ecs)
+ where
+ pTerm = "p[" ++ (show n) ++ "]"
+ typedPTerm = case literalsToPytypeMaybe cf (show c) of
+ Just s -> s ++ "(" ++ pTerm ++ ")"
+ Nothing -> pTerm
+getLeftIndexesLists tokensPly cf n (Right strOp:ecs)
+ | separatorIsEmpty tokensPly strOp = getLeftIndexesLists tokensPly cf n ecs
+ | otherwise = getLeftIndexesLists tokensPly cf (n+1) ecs
+
+
+-- | In case the deliminator is "" or is not defined for the lexer, like
+-- ignored characters.
+separatorIsEmpty :: [(String, String)] -> String -> Bool
+separatorIsEmpty tokensPly strOp
+ | length strOp > 0 = case lookup strOp tokensPly of
+ Just _ -> False
+ Nothing -> True
+ | otherwise = True
+
+
+-- | Produces a list of the elements in the code production, where the indices
+-- match the argument categories.
+getLeftIndexes :: CF -> Int -> [Either Cat String] -> [String]
+getLeftIndexes _ _ [] = []
+getLeftIndexes cf n (Left c:ecs) = [typedPTerm] ++
+ (getLeftIndexes cf (n+1) ecs)
+ where
+ pTerm = "p[" ++ (show n) ++ "]"
+ typedPTerm = case literalsToPytypeMaybe cf (show c) of
+ Just s -> s ++ "(" ++ pTerm ++ ")"
+ Nothing -> pTerm
+getLeftIndexes cf n (Right _:ecs) = getLeftIndexes cf (n+1) ecs
+
+
+-- | Produces the production in the docstring for the parsing definitions.
+prodToDocStr :: [(String, String)] -> [Either Cat String] -> String
+prodToDocStr _ [] = ""
+prodToDocStr tokensPly (ec:[]) = ecsToDocStr tokensPly ec
+prodToDocStr tokensPly (ec:ecs) =
+ ecsToDocStr tokensPly ec ++ " " ++ prodToDocStr tokensPly ecs
+
+
+-- Converts a single element in the production.
+ecsToDocStr :: [(String, String)] -> Either Cat String -> String
+ecsToDocStr _ (Left c) = translateToList $ show c
+ecsToDocStr tokensPly (Right strOp) = case lookup strOp tokensPly of
+ (Just s) -> s
+ Nothing -> ("") -- We assume it is no token, this affects getLeftIndexes
+
+
+-- | Creating the parsing definitions for the defines.
+makeDefineParsingDefs :: CF -> [(String, String)] -> [String]
+makeDefineParsingDefs cf tokensPly = defFuncsPy
+ where
+ rules = cfgRules cf
+
+ definedRules :: [Rul RFun]
+ definedRules = [r | r <- rules, isDefinedRule r]
+
+ pairs :: [(Rul RFun, Define)]
+ pairs = [(dr, d) | dr <- definedRules, d <- definitions cf,
+ nameCorresponds ((wpThing . defName) d) (funName dr)]
+
+ -- Adds a number to the name to make each define separate.
+ numberedPairs = zip [1..] pairs
+ defFuncsPy = map (makeDefineParsingDef cf tokensPly) numberedPairs
+
+
+-- | To compare names for defines. The first letter needs to be lowered, so
+-- "while" == "While".
+nameCorresponds :: String -> String -> Bool
+nameCorresponds (x:xs) (y:ys) = (toLower x == toLower y) && (xs == ys)
+nameCorresponds _ _ = error "Names can't be empty"
+
+
+-- | Creates a define parsing definition.
+makeDefineParsingDef ::
+ CF -> [(String, String)] -> (Int, (Rul RFun, Define)) -> String
+makeDefineParsingDef cf tokensPly (n, (defRule, defi)) = unlines
+ [ "def p_D" ++ (show n) ++ name ++ "(p):"
+ , "\t\"\"\""
+ , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm)
+ , "\t\"\"\""
+ , "\t# " ++ show env
+ , "\tp[0] = " ++ expToDef env (defBody defi)
+ , ""
+ ]
+ where
+ name = (wpThing . defName) defi
+ translatedCat = translateToList $ (catToStr . valCat) defRule
+ sentForm = rhsRule defRule
+ indexes = getLeftIndexes cf 1 sentForm
+ args = map fst (defArgs defi)
+ env = zip args indexes
+
+
+-- | Converts the production of a define, called an expression, to a
+-- production for the parsing definition.
+expToDef :: [(String, String)] -> Exp -> String
+expToDef env (App "(:)" _ (e:[App "[]" _ _])) = expToDef env e ++ "]"
+expToDef env (App "(:)" _ (e:[recList])) = "[" ++ expToDef env e ++ ", " ++
+ expToDef env recList
+expToDef _ (App "[]" _ _) = "[]"
+expToDef env (App fName _ exps) =
+ fName ++ "(" ++ addCommas (map (expToDef env) exps) ++ ")"
+expToDef env (Var s) = case lookup s env of
+ Just p -> p
+ Nothing -> error "Missing variable in define enviroment"
+expToDef _ (LitInt i) = "Integer(" ++ show i ++ ")"
+expToDef _ (LitDouble d) = "Double(" ++ show d ++ ")"
+expToDef _ (LitChar s) = "Char(\"" ++ show s ++ "\")"
+expToDef _ (LitString s) = "String('" ++ show s ++ "')"
+
+
diff --git a/source/src/BNFC/Backend/Python/CFtoPyLex.hs b/source/src/BNFC/Backend/Python/CFtoPyLex.hs
new file mode 100644
index 00000000..fd1a532a
--- /dev/null
+++ b/source/src/BNFC/Backend/Python/CFtoPyLex.hs
@@ -0,0 +1,189 @@
+
+{-
+ BNF Converter: Python lexer generator
+ Copyright (C) 2024 Author: Bjorn Werner
+-}
+
+module BNFC.Backend.Python.CFtoPyLex ( cf2PyLex ) where
+
+import BNFC.CF
+
+import BNFC.Backend.Python.RegToFlex (printRegFlex, escapeChar)
+import BNFC.Backend.Python.PyHelpers
+
+
+-- | The entrypoint, returns LexTokens.py and the unicode mapping.
+cf2PyLex :: CF -> (String, [(String, String)])
+cf2PyLex cf = (, tokensPly) $ unlines
+ [ "import ply.lex as lex\n"
+ , ""
+ , createReservedMap reservedWordsEnv
+ , "# PLY tokens:\n" ++ plyTokens ++ "\n"
+ , "# PLY tokens with RegEx:"
+ , unlines plyTokensRegEx
+ , "# Literals:"
+ , plyLiterals cf
+ , "# Comments:"
+ , unlines singleComments
+ , unlines multiComments
+ , footer
+ ]
+ where
+ -- The reserved keywords and the symbols are zipped with a
+ -- unicode representation, which are needed for the parsing.
+
+ -- Reserved keywords -> [("int", "R_...")]
+ reservedWordsVar :: [String]
+ reservedWordsVar = reservedWords cf
+
+ reservedWordsEnv :: [(String, String)]
+ reservedWordsEnv =
+ zip reservedWordsVar (map (("R" ++) . toOrd) reservedWordsVar)
+
+ -- Symbols -> [("+", "S_43")]
+ literalsVar :: [String]
+ literalsVar = literals cf
+
+ strOps :: [String]
+ strOps = map fst (cfTokens cf)
+
+ strOpsFiltered = filterOut strOps reservedWordsVar
+ strOpsFilteredSymbols = map (("S" ++) . toOrd) strOpsFiltered
+
+ strOpsAndSymbols :: [(String, String)]
+ strOpsAndSymbols = zip strOpsFiltered strOpsFilteredSymbols
+
+ presentSymbols :: [String]
+ presentSymbols =
+ map addCitationSigns (strOpsFilteredSymbols ++ literalsVar)
+
+ -- Defining the variables for the lexer.
+ plyTokens =
+ "tokens = reserved + (" ++ concat (map (++ ",") presentSymbols) ++ ")"
+ plyTokensRegEx = map createRegEx strOpsAndSymbols
+
+ tokensPly :: [(String, String)]
+ tokensPly = reservedWordsEnv ++ strOpsAndSymbols
+
+ -- Comments
+ (multiMatchers, singleMatchers) = comments cf
+ singleComments = map createLineCommentMatcher singleMatchers
+ multiComments = map createMultiLineCommentMatcher multiMatchers
+
+
+-- | Creates tokens for the lexer, such as "t_S_43 = r'\+'".
+createRegEx :: (String, String) -> String
+createRegEx (s, u) = "t_" ++ u ++ " = r'" ++ concat (map escapeChar s) ++ "'"
+
+
+-- | For single-line comments
+createLineCommentMatcher :: String -> String
+createLineCommentMatcher r = unlines
+ [ "def t_C" ++ (toOrd r) ++ "(t):"
+ , "\tr'" ++ concat (map escapeChar r) ++ ".*'"
+ , "\tpass"
+ ]
+
+
+-- | For multi-line comments
+createMultiLineCommentMatcher :: (String, String) -> String
+createMultiLineCommentMatcher (s, e) = unlines
+ [ "def t_C" ++ (toOrd (s ++ e)) ++ "(t):"
+ , "\tr'" ++ (escaped s) ++ "([\\s\\S]*?)" ++ (escaped e) ++ "'"
+ , "\tpass"
+ ]
+ where
+ escaped s = concat $ map escapeChar s
+
+
+-- | The reserved_map contains mappings for reserved keywords,
+-- such as 'int' : 'R_105_110_116'.
+createReservedMap :: [(String, String)] -> String
+createReservedMap xs = unlines
+ [ "reserved_map = {"
+ , unlines rows
+ , "}"
+ , ""
+ , "reserved = ("
+ , unlines rowsSnd
+ , ")"
+ ]
+ where
+ rows :: [String]
+ rows = ["\t'" ++ w ++ "' : '" ++ u ++ "'," | (w, u) <- xs]
+
+ rowsSnd = ["\t'" ++ u ++ "'," | (_, u) <- xs]
+
+
+-- | Creates lexer definitions for the lexer which are interpreted using
+-- the inspect module to retrieve useful information, for example:
+-- def t_String(t):
+-- r'"[^"]+"'
+-- t.type = reserved_map.get(t.value, ’String’)
+-- return t
+plyLiterals :: CF -> String
+plyLiterals cf = unlines $ concat
+ [
+ ifC catString [createLexFunc "String" "\"(\\\\\"|[^\"])*\""]
+ , ifC catChar
+ [createLexFunc "Char" "\\'(\\\\x[0-9a-f][0-9a-f]|\\\\?[\\S\\s])\\'"]
+ , ifC catDouble [createLexFunc "Double" "\\d+\\.\\d+(e-?\\d+)?"]
+ , ifC catInteger [createLexFunc "Integer" "\\d+"]
+ -- Prolog requires user defined tokens to have priority over Ident; C
+ -- requires Double to have priority over user defined tokens, as C has
+ -- "CDouble" matching "3." in 3.14. The lexer definitions rely on the order
+ -- for priority, not the length.
+ , userDefTokens
+ , ifC catIdent [createLexFunc "Ident" "[A-Za-z]\\w*"]
+ -- If there is no Ident present, we need a lexer definition for reserved
+ -- words:
+ , if not (isUsedCat cf (TokenCat catIdent)) && length (reservedWords cf) > 0
+ then [createLexFunc "" "[A-Za-z]\\w*"]
+ else []
+ ]
+ where
+ ifC :: TokenCat -> [String] -> [String]
+ ifC cat s = if isUsedCat cf (TokenCat cat) then s else []
+
+ userDefTokens :: [String]
+ userDefTokens = [
+ createLexFunc name (printRegFlex exp) | (name, exp) <- tokenPragmas cf
+ ]
+
+
+-- | Creates a Lexing definition for a Literal
+-- If no Literal name is used, this is just a reserved_map lookup.
+createLexFunc :: String -> String -> String
+createLexFunc name regex = unlines
+ [ "def t_" ++ (if name /= "" then name else "_NoIdentPresent") ++ "(t):"
+ , "\tr'" ++ regex ++ "'"
+ , if name /= ""
+ then "\tt.type = reserved_map.get(t.value, '" ++ name ++ "')"
+ else "\tt.type = reserved_map.get(t.value)"
+ , "\treturn t"
+ ]
+
+
+-- | Adds lexer definitions to ignore whitespaces, and a testing block
+-- which attempts tokenize some input, like: python3 LexTokens.py < input
+footer :: String
+footer = unlines
+ [ "# Ignored characters:"
+ , "t_ignore = ' \\t'"
+ , ""
+ , "# Ignored token with an action associated with it:"
+ , "def t_ignore_newline(t):"
+ , "\tr'\\n+'"
+ , "\tt.lexer.lineno += t.value.count('\\n')"
+ , ""
+ , "# Error handler for illegal characters:"
+ , "def t_error(t):"
+ , "\tprint('Illegal character', 'line', str(t.lineno) + ':', t.value[0], 'ascii:', ord(t.value[0]))"
+ , "\tquit()"
+ , ""
+ , "if __name__ == \"__main__\":"
+ , "\tlexer = lex.lex()"
+ , "\tlex.runmain(lexer)"
+ ]
+
+
diff --git a/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs b/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs
new file mode 100644
index 00000000..20255c19
--- /dev/null
+++ b/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs
@@ -0,0 +1,409 @@
+
+{-
+ BNF Converter: Python pretty-printer generator
+ Copyright (C) 2024 Author: Bjorn Werner
+ Based on CFtoCPrinter.hs, Copyright (C) 2004 Michael Pellauer
+-}
+
+module BNFC.Backend.Python.CFtoPyPrettyPrinter ( cf2PyPretty ) where
+import Data.List ( intercalate, nub )
+import BNFC.CF
+import BNFC.Backend.Python.PyHelpers
+import BNFC.Backend.Common.NamedVariables
+import Text.PrettyPrint (Doc, render)
+import Data.Either (lefts)
+import BNFC.Backend.Common.StrUtils
+import qualified Data.List.NonEmpty as List1
+
+-- | Used to create PrettyPrinter.py, that contains the functionality
+-- to print the AST and the linearized tree.
+cf2PyPretty :: String -> CF -> String
+cf2PyPretty pkgName cf = unlines
+ [ "from " ++ pkgName ++ ".Absyn import *"
+ , ""
+ , makePrintAST cf
+ , ""
+ , makeListDecons cf
+ , ""
+ , makeRenderC
+ , ""
+ , makeCoercCompare cf
+ , ""
+ , makeCompareFunc
+ , ""
+ , makeLinFunc cf
+ ]
+
+
+-- | Creates the print AST function.
+makePrintAST :: CF -> String
+makePrintAST cf = concat
+ [ "def printAST(ast: object) -> list:\n"
+ , " match ast:\n"
+ , concat
+ [ ifUsedThen catInteger
+ [ " case Integer():"
+ , " return str(ast)"
+ ]
+ , ifUsedThen catDouble
+ [ " case Double():"
+ , " if ast.is_integer():"
+ , " return str(int(ast))"
+ , " else:"
+ , " return str(ast)"
+ ]
+ , ifUsedThen catString
+ [ " case String():"
+ , " return str(ast)"
+ ]
+ , ifUsedThen catChar
+ [ " case Char():"
+ , " return str(ast)"
+ ]
+ , ifUsedThen catIdent
+ [ " case Ident():"
+ , " return '\"' + str(ast) + '\"'"
+ ]
+ ]
+ , if length (tokenNames cf) > 0
+ then unlines
+ [ " case (" ++ intercalate " | " (map (++"()") (tokenNames cf))
+ ++ "):"
+ , " return '\"' + str(ast) + '\"'"
+ ]
+ else ""
+ , " case list():\n"
+ , " return '[' + ', '.join([printAST(a) for a in ast]) + ']'\n"
+ , "\n"
+ , " if len(vars(ast)) > 0:\n"
+ , " return '(' + ast.__class__.__name__ + ' ' + ' '.join([printAST(vars(ast)[k]) for k in vars(ast) if k != '_ann_type']) + ')'\n"
+ , " else:\n"
+ , " return ast.__class__.__name__\n"
+ ]
+ where
+ ifUsedThen :: TokenCat -> [String] -> String
+ ifUsedThen cat ss
+ | isUsedCat cf (TokenCat cat) = unlines ss
+ | otherwise = ""
+
+
+-- Creates deconstructors for all list categories.
+makeListDecons :: CF -> String
+makeListDecons cf = unlines $ map (makeListDecon cf) listCats
+ where
+ rules = cfgRules cf
+ valCats = nub $ map valCat rules
+ listCats = [c | c <- valCats, isList c]
+
+
+-- Creates a deconstructor for some list category.
+makeListDecon :: CF -> Cat -> String
+makeListDecon cf c = concat
+ [ "def list" ++ name ++ "Decon(xs):\n"
+ , oneRuleStr
+ , nilRuleStr
+ , consRuleStr
+ , "\n"
+ ]
+ where
+ name = show $ catOfList c
+ listRulesForCat = rulesForCat cf c
+
+ nilRule = case [r | r <- listRulesForCat, isNilFun r] of
+ [] -> Nothing
+ rs -> Just (head rs)
+ oneRule = case [r | r <- listRulesForCat, isOneFun r] of
+ [] -> Nothing
+ rs -> Just (head rs)
+ consRule = case [r | r <- listRulesForCat, isConsFun r] of
+ [] -> Nothing
+ rs -> Just (head rs)
+
+ -- List rules are of the form:
+ -- [C] ::= symbols.. C symbols.. [C]
+ -- The production, in Python, is concatenated recursively:
+ -- symbols.. + lin(xs[0]) + symbols.. + listCDecon(xs[1:]) + symbols..
+ sentFormToArgs :: Int -> [Either Cat String] -> String
+ sentFormToArgs _ [] = "[]"
+ sentFormToArgs v (Right strOp:ecss) =
+ "['" ++ escapeChars strOp ++ "'] + " ++
+ sentFormToArgs v ecss
+ sentFormToArgs v (Left _:ecss)
+ | v == 0 = "lin(xs[0]) + " ++ sentFormToArgs (v+1) ecss
+ | v == 1 = "list" ++ name ++ "Decon(xs[1:]) + " ++
+ sentFormToArgs (v+1) ecss
+ | otherwise = error "A list production can max have C and [C]."
+
+ nilRuleStr = case nilRule of
+ Nothing -> ""
+ Just r -> unlines
+ [ " if len(xs) == 0:"
+ , " return " ++ sentFormToArgs 0 (rhsRule r)
+ ]
+
+ oneRuleStr = case oneRule of
+ Nothing -> ""
+ Just r -> unlines
+ [ " if len(xs) == 1:"
+ , " return " ++ sentFormToArgs 0 (rhsRule r)
+ ]
+
+ consRuleStr = case consRule of
+ Nothing -> ""
+ Just r -> " return " ++ sentFormToArgs 0 (rhsRule r) ++ "\n"
+
+
+-- | Creates the renderC function, which creates a string of a list of
+-- strings, and inserts white-spaces to render the language in a C-like
+-- manner.
+makeRenderC :: String
+makeRenderC = unlines
+ [ "def renderC(ss: list):"
+ , " def br(i):"
+ , " return '\\n' + ' ' * iLevel"
+ , ""
+ , " def ident(i):"
+ , " return ' ' * iLevel"
+ , ""
+ , " def removeTrailingWhitespace(tot):"
+ , " i = len(tot)"
+ , " while i > 0:"
+ , " if tot[i] == ' ':"
+ , " i -= 1"
+ , " else:"
+ , " break"
+ , ""
+ , " return tot[:i]"
+ , ""
+ , " def oneEmptyLine(tot):"
+ , " tot = tot.rstrip(' ')"
+ , " if len(tot) > 0 and tot[-1] != '\\n':"
+ , " tot += '\\n'"
+ , " tot += ident(iLevel)"
+ , " return tot"
+ , ""
+ , " tot = ''"
+ , " iLevel = 0"
+ , " for i in range(len(ss)):"
+ , " s = ss[i]"
+ , " match s:"
+ , " case '{':"
+ , " tot = oneEmptyLine(tot)"
+ , " iLevel += 1"
+ , " tot += '{' + br(iLevel)"
+ , " case ('(' | '['):"
+ , " tot += s"
+ , " case (')' | ']'):"
+ , " tot = tot.rstrip()"
+ , " tot += s + ' '"
+ , " case '}':"
+ , " iLevel -= 1"
+ , " tot = oneEmptyLine(tot)"
+ , " tot += s + br(iLevel)"
+ , " case ',':"
+ , " tot = tot.rstrip()"
+ , " tot += s + ' '"
+ , " case ';':"
+ , " tot = tot.rstrip()"
+ , " tot += s + br(iLevel)"
+ , " case '':"
+ , " tot += ''"
+ , " case ' ':"
+ , " tot += s"
+ , " case _:"
+ , " tot += s + ' '"
+ , ""
+ , " return tot"
+ ]
+
+
+-- Provides a mapping from a rule to its value category.
+makeCoercCompare :: CF -> String
+makeCoercCompare cf = concat
+ [ "cdict = {\n"
+ , unlines (map (\(fs, cs) -> " " ++ fs ++ " : '" ++ cs ++ "',") scs)
+ , "}"
+ ]
+ where
+ scs :: [(String, String)]
+ scs = [(funName r, (show . wpThing . valRCat) r) | r <- cfgRules cf,
+ not (isCoercion r), not (isNilCons r), not (isDefinedRule r)]
+
+
+-- | Creates a function that attempts to figure out if
+-- parentheses are required, for example:
+-- 1 + (2 * 3)
+-- The precedence for the addition is low, say Exp, but the multiplication
+-- has a higher precedence, say Exp1, so parantheses are needed.
+makeCompareFunc :: String
+makeCompareFunc = unlines
+ [ "def c(ast, cat: str) -> list:"
+ , " cl = ast.__class__"
+ , " if cl in cdict:"
+ , " clCat = cdict[cl]"
+ , " clCatAlphas = ''.join(filter(str.isalpha, clCat))"
+ , " catAlphas = ''.join(filter(str.isalpha, cat))"
+ , " clCatNums = ''.join(filter(str.isnumeric, clCat))"
+ , " catNums = ''.join(filter(str.isnumeric, cat))"
+ , " clCatNum = 0"
+ , " catNum = 0"
+ , " if clCatAlphas == catAlphas:"
+ , " if len(clCatNums) > 0:"
+ , " clCatNum = int(clCatNums)"
+ , " if len(catNums) > 0:"
+ , " catNum = int(catNums)"
+ , " if clCatNum < catNum:"
+ , " return ['('] + lin(ast) + [')']"
+ , " return lin(ast)"
+ ]
+
+
+-- | Returns the AST as a list of characters, which can be sent into the
+-- renderC.function.
+makeLinFunc :: CF -> String
+makeLinFunc cf = unlines
+ [ "def lin(ast: object) -> list:"
+ , " match ast:"
+ , concat
+ [ ifUsedThen catInteger
+ [ " case Integer():"
+ , " return [str(ast)]"
+ ]
+ , ifUsedThen catDouble
+ [ " case Double():"
+ , " if ast.is_integer():"
+ , " return [str(int(ast))]"
+ , " else:"
+ , " return [str(ast)]"
+ ]
+ , ifUsedThen catString
+ [ " case String():"
+ , " return [ast, ' ']"
+ ]
+ , ifUsedThen catIdent
+ [ " case Ident():"
+ , " return [ast]"
+ ]
+ , ifUsedThen catChar
+ [ " case Char():"
+ , " return [ast]"
+ ]
+ ]
+ , " # skeleTokenCases:"
+ , unlines skeleTokenCases
+ , " # skeleRuleCases:"
+ , unlines skeleRuleCases
+ , -- Deals with cases where the entrypoint is say [Stm] or [Exp],
+ -- with pattern matching on the first object in the list.
+ " case " ++ "list():"
+ , " if len(ast) == 0:"
+ , " return []"
+ , " else:"
+ , " match ast[0]:"
+ , unlines listEntrypointCases
+ , " case _:"
+ , " raise Exception(ast[0].__class__.__name__, " ++
+ "'unmatched ast[0]')"
+ , " case _:"
+ , " raise Exception(str(ast.__class__) + ' unmatched')"
+ ]
+ where
+ -- Used to include standard literals, if needed.
+ ifUsedThen :: TokenCat -> [String] -> String
+ ifUsedThen cat ss
+ | isUsedCat cf (TokenCat cat) = unlines ss
+ | otherwise = ""
+
+ -- Figures out the deliminators for the separators and terminators,
+ -- to further process a deconstructed object that contains list(s).
+ rules = [r | r <- cfgRules cf
+ , not (isCoercion r)
+ , not (isDefinedRule r)
+ , not (isNilCons r)
+ ]
+
+ skeleTokenCases = map makeSkeleTokenCase (tokenNames cf)
+ skeleRuleCases = map makeSkeleRuleCase rules
+
+ catEntrypointsForLists =
+ [catOfList c | c <- (List1.toList . allEntryPoints) cf, isList c]
+
+ -- The Haskell backend defaults to the production for the lowest
+ -- precedence for lists that are defined. Like ``separator Exp1 ","``.
+ lowestPrecListCats = [c | c <- catEntrypointsForLists,
+ precCat c == (minimum (map precCat
+ [c2 | c2 <- catEntrypointsForLists, normCat c == normCat c2]
+ )
+ )
+ ]
+
+ listEntrypointCases =
+ map (makeListEntrypointCase cf) lowestPrecListCats
+
+
+-- | Creates cases that checks what class individual nodes might be, meaning
+-- the rule names, or the token categories
+makeListEntrypointCase :: CF -> Cat -> String
+makeListEntrypointCase cf c = concat
+ [ " case " ++ intercalate "|" constructors ++ ":\n"
+ , " return list" ++ show c ++ "Decon(ast)"
+ ]
+ where
+ constructors = if isTokenCat c
+ then [show c ++ "()"]
+ else map ((++ "()") . funName)
+ [
+ r | r <- rulesForNormalizedCat cf (normCat c),
+ not (isCoercion r),
+ not (isDefinedRule r)
+ ]
+
+
+-- Creates a case for a user defined literal, which inherits str.
+makeSkeleTokenCase :: String -> String
+makeSkeleTokenCase tokenName = concat
+ [ " case " ++ tokenName ++ "():\n"
+ , " return [ast]"
+ ]
+
+
+-- | Creates a case for some rule, with the additional information of what
+-- separator- and terminator-delimiters there are.
+makeSkeleRuleCase :: Rul RFun -> String
+makeSkeleRuleCase rule = concat
+ [ " case " ++ fName ++ "(" ++ varNamesCommad ++ "):\n"
+ , " # " ++ (showEcss sentForm) ++ "\n"
+ , " return " ++ if (length args > 0)
+ then (intercalate " + " args)
+ else "[]"
+ ]
+ where
+ fName = wpThing (funRule rule)
+ sentForm = rhsRule rule
+
+ nvCats = numVars sentForm :: [Either (Cat, Doc) String]
+
+ enumeratedVarNames = [render d | (c, d) <- lefts nvCats]
+
+ varNamesCommad = if length enumeratedVarNames > 0
+ then addCommas (enumeratedVarNames ++ ["_ann_type"])
+ else ""
+
+ args = ecssAndVarsToList
+ sentForm
+ enumeratedVarNames
+
+
+-- | Creates a list of a production with both terminals and non-terminals.
+ecssAndVarsToList :: [Either Cat String] -> [String] -> [String]
+ecssAndVarsToList [] _ = []
+ecssAndVarsToList (Left c:ecss) (s:ss)
+ | isList c = ["list" ++ name ++ "Decon(" ++ s ++ ")"] ++
+ ecssAndVarsToList ecss ss
+ | otherwise = ["c(" ++ s ++ ", '" ++ (show c) ++ "')"] ++
+ ecssAndVarsToList ecss ss
+ where
+ name = show $ catOfList c
+ecssAndVarsToList (Right strOp:ecss) ss =
+ ["['" ++ escapeChars strOp ++ "']"] ++ ecssAndVarsToList ecss ss
+
diff --git a/source/src/BNFC/Backend/Python/CFtoPySkele.hs b/source/src/BNFC/Backend/Python/CFtoPySkele.hs
new file mode 100644
index 00000000..5297fa02
--- /dev/null
+++ b/source/src/BNFC/Backend/Python/CFtoPySkele.hs
@@ -0,0 +1,114 @@
+
+{-
+ BNF Converter: Python skeleton-code generator
+ Copyright (C) 2024 Author: Bjorn Werner
+-}
+
+module BNFC.Backend.Python.CFtoPySkele where
+import BNFC.CF
+import BNFC.Backend.Python.PyHelpers
+import Data.Char (toLower)
+import BNFC.Backend.Common.NamedVariables
+import Text.PrettyPrint (Doc, render)
+import Data.Either (lefts)
+
+-- | Entrypoint.
+cf2PySkele :: String -> CF -> String
+cf2PySkele pkgName cf = unlines
+ [ "from ply.lex import lex"
+ , "from ply.yacc import yacc"
+ , "import sys"
+ , "from " ++ pkgName ++ ".LexTokens import *"
+ , "from " ++ pkgName ++ ".ParsingDefs import *"
+ , "from " ++ pkgName ++ ".PrettyPrinter import *"
+ , ""
+ , makeSkele cf
+ ]
+
+
+-- Creates first a matcher with all value categories, and underneath one
+-- matcher for each value category.
+makeSkele :: CF -> String
+makeSkele cf = unlines
+ [ "# Categories combined into one matcher"
+ , "def skeleMatcher(ast: object):"
+ , "\tmatch ast:"
+ , unlines skeleLiteralCases
+ , unlines skeleTokenCases
+ , unlines skeleRuleCases
+ , "\t\tcase _:"
+ , "\t\t\traise Exception(str(ast.__class__) + ' unmatched')"
+ , ""
+ , "# Categories split into their own matchers"
+ , unlines matchersOnCats
+ ]
+ where
+ rules =
+ [ r | r <- cfgRules cf
+ , not (isCoercion r)
+ , not (isDefinedRule r)
+ , not (isNilCons r)
+ ]
+
+ presentLiterals = ifC catInteger ++
+ ifC catDouble ++
+ ifC catString ++
+ ifC catIdent ++
+ ifC catChar
+
+ skeleLiteralCases = map makeSkeleTokenCase presentLiterals
+ skeleTokenCases = map makeSkeleTokenCase (tokenNames cf)
+ skeleRuleCases = map makeSkeleRuleCase rules
+
+ parserCats = filter (not . isList) (allParserCatsNorm cf) :: [Cat]
+ rulesfornormalizedcat = map (rulesForNormalizedCat cf) parserCats
+ parserCatsWithRules = zip parserCats rulesfornormalizedcat
+
+ matchersOnCats = map makeMatcherOnCat parserCatsWithRules
+
+ ifC :: TokenCat -> [String]
+ ifC cat = if isUsedCat cf (TokenCat cat) then [cat] else []
+
+
+-- Creates a matcher for some value category.
+makeMatcherOnCat :: (Cat, [Rul RFun]) -> String
+makeMatcherOnCat (c, rules) = unlines
+ [ "def matcher" ++ show c ++ "(" ++ varName ++ ": " ++ show c ++ "):"
+ , "\tmatch " ++ varName ++ ":"
+ , unlines cases
+ ,"\t\tcase _:"
+ ,"\t\t\traise Exception(str(" ++ varName ++ ".__class__) + ' unmatched')"
+ ]
+ where
+ varName = map toLower (show c) ++ "_"
+ cases = map makeSkeleRuleCase (filter
+ (\r -> not (isCoercion r) && not (isDefinedRule r))
+ rules)
+
+
+-- | Creates a case for some rule.
+makeSkeleRuleCase :: Rul RFun -> String
+makeSkeleRuleCase rule = concat
+ [ "\t\tcase " ++ fName ++ "(" ++ varNamesCommad ++ "):\n"
+ , "\t\t\t# " ++ (showEcss sentForm) ++ "\n"
+ , "\t\t\traise Exception('" ++ fName ++ " not implemented')"
+ ]
+ where
+ funcRStr = funRule rule :: RString
+ fName = wpThing funcRStr :: String
+ sentForm = rhsRule rule
+
+ nvCats = numVars sentForm :: [Either (Cat, Doc) String]
+
+ enumeratedVarNames = [render d | (_, d) <- lefts nvCats]
+
+ varNamesCommad = addCommas (enumeratedVarNames ++ ["_ann_type"])
+
+
+-- | Creates a case for a user-defined token.
+makeSkeleTokenCase :: String -> String
+makeSkeleTokenCase tokenName = concat
+ [ "\t\tcase " ++ tokenName ++ "():\n"
+ , "\t\t\traise Exception('not implemented')"
+ ]
+
diff --git a/source/src/BNFC/Backend/Python/PyHelpers.hs b/source/src/BNFC/Backend/Python/PyHelpers.hs
new file mode 100644
index 00000000..300c1d9f
--- /dev/null
+++ b/source/src/BNFC/Backend/Python/PyHelpers.hs
@@ -0,0 +1,80 @@
+
+{-
+ BNF Converter: Python backend helper functions
+ Copyright (C) 2024 Author: Bjorn Werner
+-}
+
+module BNFC.Backend.Python.PyHelpers where
+import Data.List ( intercalate )
+import Data.Char
+import BNFC.CF
+
+
+addCommas :: [String] -> String
+addCommas ss = intercalate ", " ss
+
+
+addCitationSigns :: String -> String
+addCitationSigns ss = "'" ++ ss ++ "'"
+
+
+filterOut :: Eq a => [a] -> [a] -> [a]
+filterOut xs ys = filter (\x -> not (elem x ys)) xs
+
+
+-- Converts every character to unicode with an underscore in front.
+toOrd :: String -> String
+toOrd s = concat (map (("_" ++) . show . ord) s)
+
+
+-- | Converts a string of underscores and unicode numbers such as "_53_53"
+-- into "++".
+toChr :: String -> String
+toChr "" = ""
+toChr xs = map chr nrs
+ where
+ nrsStr = tail $ split '_' xs :: [String]
+ nrs = map read nrsStr :: [Int]
+
+
+split :: Char -> String -> [String]
+split c s = split' c s ""
+
+
+split' :: Char -> String -> String -> [String]
+split' _ [] ps = [ps]
+split' c (s:ss) ps
+ | c == s = [ps] ++ split' c ss ""
+ | otherwise = split' c ss (ps ++ [s])
+
+
+-- Converts [Cat] into ListCat, which is mainly used in the parser.
+translateToList :: String -> String
+translateToList s
+ | strIsList s = "List" ++ (tail $ init s)
+ | otherwise = s
+
+
+strIsList :: String -> Bool
+strIsList s = head s == '[' && last s == ']'
+
+
+firstRight :: [Either a b] -> Maybe b
+firstRight [] = Nothing
+firstRight (Left _:es) = firstRight es
+firstRight (Right r:_) = Just r
+
+
+-- Retrieves the first character from strings such as "[Stm]" or "Stm".
+firstAlpha :: String -> Char
+firstAlpha s
+ | strIsList s = head $ tail s
+ | otherwise = head s
+
+
+-- | Converts a production into a string, for comments.
+showEcss :: [Either Cat String] -> String
+showEcss [] = ""
+showEcss (Left c:ecss) = show c ++ " " ++ (showEcss ecss)
+showEcss (Right strOp:ecss) = "\"" ++ strOp ++ "\" " ++ (showEcss ecss)
+
diff --git a/source/src/BNFC/Backend/Python/RegToFlex.hs b/source/src/BNFC/Backend/Python/RegToFlex.hs
new file mode 100644
index 00000000..37e357b4
--- /dev/null
+++ b/source/src/BNFC/Backend/Python/RegToFlex.hs
@@ -0,0 +1,97 @@
+{-# LANGUAGE LambdaCase #-}
+
+{-
+ Due to the almost full similarity, the name RegToFlex remains from the
+ C backend (2024).
+-}
+
+module BNFC.Backend.Python.RegToFlex (printRegFlex, escapeChar) where
+
+-- modified from pretty-printer generated by the BNF converter
+
+import Data.Char (ord, showLitChar)
+import qualified Data.List as List
+import BNFC.Abs (Reg(..), Identifier(Identifier))
+import BNFC.Backend.Common (flexEps)
+
+
+-- the top-level printing method
+printRegFlex :: Reg -> String
+printRegFlex = render . prt 0
+
+
+-- you may want to change render and parenth
+render :: [String] -> String
+render = rend (0::Int) where
+ rend i ss = case ss of
+ "[" :ts -> cons "[" $ rend i ts
+ "(" :ts -> cons "(" $ rend i ts
+ t : "," :ts -> cons t $ space "," $ rend i ts
+ t : ")" :ts -> cons t $ cons ")" $ rend i ts
+ t : "]" :ts -> cons t $ cons "]" $ rend i ts
+ t :ts -> space t $ rend i ts
+ _ -> ""
+ cons s t = s ++ t
+ space t s = if null s then t else t ++ s
+
+
+parenth :: [String] -> [String]
+parenth ss = ["("] ++ ss ++ [")"]
+
+
+-- the printer class does the job
+class Print a where
+ prt :: Int -> a -> [String]
+
+
+prPrec :: Int -> Int -> [String] -> [String]
+prPrec i j = if j prPrec i 2 (concat [prt 2 reg0 , prt 3 reg])
+ RAlt reg0 reg -> prPrec i 1 (concat [prt 1 reg0 , ["|"] , prt 2 reg])
+
+ -- Flex does not support set difference. See link for valid patterns.
+ -- https://westes.github.io/flex/manual/Patterns.html#Patterns
+ -- RMinus reg0 reg -> prPrec i 1 (concat [prt 2 reg0 , ["#"] , prt 2 reg])
+ RMinus reg0 REps -> prt i reg0 -- REps is identity for set difference
+ RMinus RAny (RChar c) -> [ concat [ "[^", escapeChar c, "]" ] ]
+ RMinus RAny (RAlts str) -> [ concat [ "[^", concatMap escapeChar str, "]" ] ]
+ -- FIXME: unicode inside brackets [...] is not accepted by flex
+ -- FIXME: maybe we could add cases for char - RDigit, RLetter etc.
+ RMinus _ _ -> error "Flex does not support general set difference"
+
+ RStar reg -> concat [ prt 3 reg , ["*"] ]
+ RPlus reg -> concat [ prt 3 reg , ["+"] ]
+ ROpt reg -> concat [ prt 3 reg , ["?"] ]
+ REps -> [ flexEps ]
+ RChar c -> [ escapeChar c ]
+ -- Unicode characters cannot be inside [...] so we use | instead.
+ RAlts str -> prPrec i 1 $ List.intersperse "|" $ map escapeChar str
+ -- RAlts str -> concat [["["], prt 0 $ concatMap escapeChar str, ["]"]]
+ RSeqs str -> prPrec i 2 $ map escapeChar str
+ RDigit -> [ "\\d" ]
+ RLetter -> [ "[A-Za-z]" ] -- add underscore ?
+ RUpper -> [ "[A-Z]" ]
+ RLower -> [ "[a-z]" ]
+ RAny -> [ "." ]
+
+
+-- | Handle special characters in regular expressions.
+escapeChar :: Char -> String
+escapeChar c
+ | c `elem` reserved = '\\':[c]
+ | let x = ord c, x >= 256 = [c]
+ -- keep unicode characters -- "\x" ++ showHex x ""
+ | otherwise = showLitChar c ""
+ where
+ reserved :: String
+ reserved = " '$+-*=<>[](){}!?.,;:^~|&%#/\\$_@\""
+
+
diff --git a/source/src/BNFC/Options.hs b/source/src/BNFC/Options.hs
index ac5fdbf6..74a1c757 100644
--- a/source/src/BNFC/Options.hs
+++ b/source/src/BNFC/Options.hs
@@ -64,6 +64,7 @@ data Target = TargetC | TargetCpp | TargetCppNoStl
| TargetHaskell | TargetHaskellGadt | TargetLatex
| TargetJava | TargetOCaml | TargetPygments
| TargetTreeSitter
+ | TargetPython
| TargetCheck
deriving (Eq, Bounded, Enum, Ord)
@@ -83,6 +84,7 @@ instance Show Target where
show TargetPygments = "Pygments"
show TargetTreeSitter = "Tree-sitter"
show TargetCheck = "Check LBNF file"
+ show TargetPython = "Python"
-- | Which version of Alex is targeted?
data AlexVersion = Alex3
@@ -261,6 +263,7 @@ printTargetOption = ("--" ++) . \case
TargetOCaml -> "ocaml"
TargetPygments -> "pygments"
TargetTreeSitter -> "tree-sitter"
+ TargetPython -> "python"
TargetCheck -> "check"
printAlexOption :: AlexVersion -> String
@@ -314,6 +317,8 @@ targetOptions =
"Output a Python lexer for Pygments"
, Option "" ["tree-sitter"] (NoArg (\o -> o {target = TargetTreeSitter}))
"Output grammar.js file for use with tree-sitter"
+ , Option "" ["python"] (NoArg (\ o -> o{target = TargetPython }))
+ "Output Python code for use with PLY"
, Option "" ["check"] (NoArg (\ o -> o{target = TargetCheck }))
"No output. Just check input LBNF file"
]
@@ -530,6 +535,7 @@ instance Maintained Target where
TargetOCaml -> True
TargetPygments -> True
TargetTreeSitter -> True
+ TargetPython -> True
TargetCheck -> True
instance Maintained AlexVersion where
@@ -661,4 +667,5 @@ translateOldOptions = mapM $ \ o -> do
, ("--ghc" , "--generic")
, ("--deriveGeneric" , "--generic")
, ("--deriveDataTypeable" , "--generic")
+ , ("-python" , "--python")
]
diff --git a/testing/src/ParameterizedTests.hs b/testing/src/ParameterizedTests.hs
index ce0c945c..13d85e7d 100644
--- a/testing/src/ParameterizedTests.hs
+++ b/testing/src/ParameterizedTests.hs
@@ -421,6 +421,10 @@ parameters = concat
, javaParams { tpName = "Java (with jflex and line numbers)"
, tpBnfcOptions = ["--java", "--jflex", "-l"] }
]
+ -- Python
+ , [ pythonParams { tpName = "Python"
+ , tpBnfcOptions = ["--python"] }
+ ]
]
where
base = baseParameters
@@ -444,6 +448,14 @@ parameters = concat
, tpBnfcOptions = ["--ocaml"]
, tpRunTestProg = haskellRunTestProg
}
+ pythonParams = base
+ { tpBuild = do
+ return () -- nothing to make or compile
+ ,
+ tpRunTestProg = \ _lang args -> do
+ pyFile_ <- findFile "genTest.py"
+ cmd "python3.10" $ pyFile_ : args
+ }
-- | Helper function that runs bnfc with the context's options and an
-- option to generate 'tpMakefile'.
From 59181603b0fcfb50b5b0782b988e1fefb7558ecc Mon Sep 17 00:00:00 2001
From: AiStudent <8739546+AiStudent@users.noreply.github.com>
Date: Tue, 27 Aug 2024 14:57:22 +0200
Subject: [PATCH 2/8] Update user_guide.rst
---
docs/user_guide.rst | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/user_guide.rst b/docs/user_guide.rst
index 8273f1d5..90e9ed97 100644
--- a/docs/user_guide.rst
+++ b/docs/user_guide.rst
@@ -318,7 +318,7 @@ Example usage: ::
* - skele.py
- Provides skeleton code to deconstruct an AST, using structural pattern matching.
-Optionally one may with ``-m``` also create a makefile that contains the target
+Optionally one may with ``-m`` also create a makefile that contains the target
"distclean" to remove the generated files.
Testing the frontend
From 1f9a9179c22b97aea9c003ca37908efa725faa86 Mon Sep 17 00:00:00 2001
From: AiStudent
Date: Tue, 27 Aug 2024 15:46:45 +0200
Subject: [PATCH 3/8] changed python3.10 commands to python3
---
docs/user_guide.rst | 6 +++---
document/BNF_Converter_Python_Mode.html | 2 +-
testing/src/ParameterizedTests.hs | 2 +-
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/docs/user_guide.rst b/docs/user_guide.rst
index 8273f1d5..534eb1d2 100644
--- a/docs/user_guide.rst
+++ b/docs/user_guide.rst
@@ -326,15 +326,15 @@ Testing the frontend
It's possible to pipe input, like::
- echo "(1 + 2) * 3" | python3.10 genTest.py
+ echo "(1 + 2) * 3" | python3 genTest.py
or::
- python3.10 genTest.py < file.txt
+ python3 genTest.py < file.txt
and it's possible to just use an argument::
- python3.10 genTest.py file.txt
+ python3 genTest.py file.txt
Caveats
diff --git a/document/BNF_Converter_Python_Mode.html b/document/BNF_Converter_Python_Mode.html
index 429b5c44..8021b3e1 100644
--- a/document/BNF_Converter_Python_Mode.html
+++ b/document/BNF_Converter_Python_Mode.html
@@ -76,7 +76,7 @@ Testing the frontend
The following example uses a frontend that is generated from a C-like grammar.
- $ python3.10 genTest.py < hello.c
+ $ python3 genTest.py < hello.c
Generating LALR tables
diff --git a/testing/src/ParameterizedTests.hs b/testing/src/ParameterizedTests.hs
index 13d85e7d..8231c8eb 100644
--- a/testing/src/ParameterizedTests.hs
+++ b/testing/src/ParameterizedTests.hs
@@ -454,7 +454,7 @@ parameters = concat
,
tpRunTestProg = \ _lang args -> do
pyFile_ <- findFile "genTest.py"
- cmd "python3.10" $ pyFile_ : args
+ cmd "python3" $ pyFile_ : args
}
-- | Helper function that runs bnfc with the context's options and an
From 8ee25b37f1472cd9a1f17c5ed0a2987f101e7613 Mon Sep 17 00:00:00 2001
From: AiStudent
Date: Tue, 27 Aug 2024 15:54:26 +0200
Subject: [PATCH 4/8] Cleanup
---
docs/user_guide.rst | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/user_guide.rst b/docs/user_guide.rst
index 780cd4db..4e517da9 100644
--- a/docs/user_guide.rst
+++ b/docs/user_guide.rst
@@ -290,7 +290,7 @@ Python Backend
The BNF Converter's Python Backend generates a Python frontend, that uses
`PLY `_ (Python Lex Yacc), to parse
-input into an AST (abstract syntax tree).
+input into an abstract syntax tree.
Python 3.10 or higher is needed.
From c8d5ede6851a99e52897854c3fe98177ff22ae54 Mon Sep 17 00:00:00 2001
From: AiStudent
Date: Tue, 5 Nov 2024 13:45:29 +0100
Subject: [PATCH 5/8] Updated to Lark
---
source/BNFC.cabal | 1 -
source/src/BNFC/Backend/Python.hs | 41 +-
source/src/BNFC/Backend/Python/CFtoPyAbs.hs | 563 +++++++++++---------
source/src/BNFC/Backend/Python/CFtoPyLex.hs | 189 -------
4 files changed, 319 insertions(+), 475 deletions(-)
delete mode 100644 source/src/BNFC/Backend/Python/CFtoPyLex.hs
diff --git a/source/BNFC.cabal b/source/BNFC.cabal
index c232b401..9280a8f3 100644
--- a/source/BNFC.cabal
+++ b/source/BNFC.cabal
@@ -283,7 +283,6 @@ library
-- Python backend
BNFC.Backend.Python
BNFC.Backend.Python.CFtoPyAbs
- BNFC.Backend.Python.CFtoPyLex
BNFC.Backend.Python.CFtoPyPrettyPrinter
BNFC.Backend.Python.RegToFlex
BNFC.Backend.Python.PyHelpers
diff --git a/source/src/BNFC/Backend/Python.hs b/source/src/BNFC/Backend/Python.hs
index e0a3da35..13568e38 100644
--- a/source/src/BNFC/Backend/Python.hs
+++ b/source/src/BNFC/Backend/Python.hs
@@ -14,23 +14,20 @@ import BNFC.CF (CF, firstEntry)
import BNFC.Options (SharedOptions, optMake, lang)
import BNFC.Backend.Base (MkFiles, mkfile)
import BNFC.Backend.Python.CFtoPyAbs (cf2PyAbs)
-import BNFC.Backend.Python.CFtoPyLex (cf2PyLex)
import BNFC.Backend.Python.CFtoPyPrettyPrinter (cf2PyPretty)
import BNFC.Backend.Python.CFtoPySkele (cf2PySkele)
import BNFC.Backend.Python.PyHelpers
-
-import BNFC.PrettyPrint -- For Doc
+import BNFC.PrettyPrint
import qualified BNFC.Backend.Common.Makefile as Makefile
+
-- | Entrypoint for BNFC to use the Python backend.
makePython :: SharedOptions -> CF -> MkFiles ()
makePython opts cf = do
let pkgName = "bnfcPyGen" ++ name
- let (lexerDefs, tokensPly) = cf2PyLex cf
- let (parsingDefs, abstractClasses) = cf2PyAbs pkgName cf tokensPly
+ let (parsingDefs, abstractClasses) = cf2PyAbs pkgName cf
let prettyPrinter = cf2PyPretty pkgName cf
let skeletonCode = cf2PySkele pkgName cf
- mkPyFile (pkgName ++ "/LexTokens.py") lexerDefs
mkPyFile (pkgName ++ "/ParsingDefs.py") parsingDefs
mkPyFile (pkgName ++ "/Absyn.py") abstractClasses
mkPyFile (pkgName ++ "/PrettyPrinter.py") prettyPrinter
@@ -56,11 +53,9 @@ makefile pkgName optMakefileName basename = vcat
, Makefile.mkRule "vclean" []
[ "rm -f " ++ unwords
[
- pkgName ++ "/LexTokens.py",
pkgName ++ "/ParsingDefs.py",
pkgName ++ "/Absyn.py",
pkgName ++ "/PrettyPrinter.py",
- pkgName ++ "/LexTokens.py.bak",
pkgName ++ "/ParsingDefs.py.bak",
pkgName ++ "/Absyn.py.bak",
pkgName ++ "/PrettyPrinter.py.bak",
@@ -74,7 +69,6 @@ makefile pkgName optMakefileName basename = vcat
"rmdir " ++ pkgName,
"rm -f __pycache__/*.pyc",
"rm -fd __pycache__",
- "rm -f parser.out parsetab.py",
"rm -f " ++ makefileName,
"rm -f " ++ makefileName ++ ".bak"
]
@@ -94,17 +88,14 @@ comment x = "# " ++ x
pyTest :: String -> CF -> String
pyTest pkgName cf = unlines
[
- "from ply.lex import lex",
- "from ply.yacc import yacc",
"import sys",
- "from " ++ pkgName ++ ".LexTokens import *",
"from " ++ pkgName ++ ".ParsingDefs import *",
"from " ++ pkgName ++ ".PrettyPrinter import *",
"",
"",
"# Suggested input options:",
- "# python3.10 genTest.py < sourcefile",
- "# python3.10 genTest.py sourcefile inputfile (i.e. for interpreters).",
+ "# python3 genTest.py < sourcefile",
+ "# python3 genTest.py sourcefile inputfile (i.e. for interpreters).",
"inputFile = None",
"if len(sys.argv) > 1:",
"\tf = open(sys.argv[1], 'r')",
@@ -117,22 +108,14 @@ pyTest pkgName cf = unlines
"\tfor line in sys.stdin:",
"\t\tinp += line",
"",
- "",
- "# Customizable error handling for the parsing",
- "def p_error(p: lex.LexToken):",
- "\tif p is None:",
- "\t\tprint('No rule could reduce the tokenized input')",
- "\telse:",
- "\t\tprint('line:', p.lineno, 'lexpos:', p.lexpos, f'Syntax error at {p.value!r}')",
- "\t\tp.lexer.syntaxError = True",
- "",
+ "def onError(e):",
+ " print(e)",
+ " print('Parse failed')",
+ " quit(1)",
"",
"# By default the first entrypoint is used. See ParsingDefs.py for alternatives.",
- "lexer = lex.lex()",
- "parser = yacc(start=" ++ defaultEntry ++ ")",
- "lexer.syntaxError = False",
- "ast = parser.parse(inp, lexer=lexer)",
- "if ast and not lexer.syntaxError:",
+ "ast = parser.parse(inp, on_error=onError)",
+ "if ast: # and not lexer.syntaxError:",
"\tprint('Parse Successful!\\n')",
"\tprint('[Abstract Syntax]')",
"\tprint(printAST(ast))",
@@ -144,6 +127,4 @@ pyTest pkgName cf = unlines
"\tprint('Parse failed')",
"\tquit(1)"
]
- where
- defaultEntry = (addCitationSigns . translateToList . show . firstEntry) cf
diff --git a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
index a4712fb0..adad97f3 100644
--- a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
+++ b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
@@ -9,45 +9,47 @@ module BNFC.Backend.Python.CFtoPyAbs (cf2PyAbs) where
import Data.List ( nub, intercalate )
import BNFC.CF
import BNFC.Backend.Python.PyHelpers
+import BNFC.Backend.Python.RegToFlex (printRegFlex, escapeChar)
import BNFC.Backend.Common.NamedVariables
import Text.PrettyPrint (Doc, render)
import Data.Either (lefts)
-import Data.Char (toLower)
+import Data.Char (toLower, toUpper)
import qualified Data.List.NonEmpty as List1
+
-- | The result is ParsingDefs.py & Absyn.py
cf2PyAbs
:: String
-> CF -- ^ Grammar.
- -> [(String, String)] -- Tokens to unicode mapping
-> (String, String) -- ParsingDefs.py, Absyn.py.
-cf2PyAbs pkgName cf tokensPly = ( unlines
- [ "from " ++ pkgName ++ ".Absyn import *"
- , "\n\n" ++ createCommonEntrypointDef cf
- , "\n\n" ++ (unlines parsingDefs)
- , if length definesParsingDefs > 0
- then "\n\n# Parsing rules from defines"
- else ""
- , "\n\n" ++ unlines definesParsingDefs
+cf2PyAbs pkgName cf = ( unlines
+ [ "from lark import Lark, Transformer, v_args"
+ , "from dataclasses import dataclass"
+ , "from " ++ pkgName ++ ".Absyn import *"
+ , ""
+ , createGrammar cf
+ , createTransformer cf
+ , ""
+ , "# Create Lark parser with the given grammar"
+ , "parser = Lark(grammar, start='start', parser='lalr', lexer='basic', " ++
+ "transformer=TreeTransformer())"
+ , ""
+ ]
+ , unlines
+ ["from typing import List as _List"
+ ,"# Value categories (no coercsions):"
+ , unlines valueCatsClasses
+ , ""
+ , placeholderVariableClass
+ , ""
+ ,"# Rules:"
+ ,"from dataclasses import dataclass, field"
+ ,"\n" ++ (unlines dataClasses)
]
- , "from typing import List as _List" ++
- "\n\n# Value categories (no coercsions):" ++
- "\n\n" ++ unlines valueCatsClasses ++
- "\n\n" ++ placeholderVariableClass ++
- "\n\n# Rules:" ++
- "\n" ++ "from dataclasses import dataclass, field" ++
- "\n\n" ++ (unlines dataClasses)
)
where
rules = cfgRules cf
-
- -- To create ParsingDefs.py
- parsingDefs :: [String]
- parsingDefs = map (ruleToParsingDef cf tokensPly)
- [r | r <- rules, isParsable r, not (isDefinedRule r)]
- definesParsingDefs = makeDefineParsingDefs cf tokensPly
-
-- To create Absyn.py
dataClasses :: [String]
dataClasses = map makePythonClass
@@ -69,10 +71,289 @@ cf2PyAbs pkgName cf tokensPly = ( unlines
, "Integer(int)"
, "Double(float)"
]
-
valueCatsClasses = map createValueCatClass valueCatNames
-
+
+-- Creates a grammar for Lark. Not that it is a real string (r"...").
+createGrammar :: CF -> String
+createGrammar cf = unlines
+ [ "grammar = r\"\"\""
+ , " ?start: " ++ map toLower ((translateToList .show . firstEntry) cf)
+ , ""
+ , unlines orClauses
+ , larkLiterals cf
+ , unlines singleComments
+ , unlines multiComments
+ , " %import common.WS"
+ , " %ignore WS"
+ , "\"\"\""
+ ]
+ where
+ aCats = reallyAllCats cf
+ rs = cfgRules cf
+
+ enumeratedRules :: [(Int, Rul RFun)]
+ enumeratedRules = enumerateAllDefinedRules rs 1 []
+ orClauses = map (createOrClause cf enumeratedRules) aCats
+
+ (multiMatchers, singleMatchers) = comments cf
+ singleComments = map createLineCommentMatcher singleMatchers
+ multiComments = map createMultiLineCommentMatcher multiMatchers
+
+
+-- Enumerates all (only defined relevant) rules to prevent naming overlap.
+enumerateAllDefinedRules :: [Rul RFun] -> Int -> [(Int, Rul RFun)]
+ -> [(Int, Rul RFun)]
+enumerateAllDefinedRules [] _ irs = irs
+enumerateAllDefinedRules (r:rs) n irs
+ | isDefinedRule r = enumerateAllDefinedRules rs (n+1) (irs ++ [(n, r)])
+ | otherwise = enumerateAllDefinedRules rs n (irs ++ [(0, r)])
+
+
+-- Creates an or clause with all rules for a given category.
+createOrClause :: CF -> [(Int, Rul RFun)] -> Cat -> String
+createOrClause cf irs c = unlines
+ [ " ?" ++ map toLower (translateToList (show c)) ++ ": " ++
+ intercalate "\n | "
+ (map createProdAndNameForRule catsIrs)
+ ]
+ where
+ catsIrs = [(n, removeWhiteSpaceSeparators r) | (n, r) <- irs,
+ valCat r == c, isParsable r]
+
+
+-- Creates an entry for an or clause.
+createProdAndNameForRule :: (Int, Rul RFun) -> String
+createProdAndNameForRule (n, r) = prodToDocStr (rhsRule r) ++
+ if (not (isCoercion r)) then " -> " ++ map toLower name else ""
+ where
+ name
+ | isNilFun r = "nil" ++ (identCat . valCat) r
+ | isOneFun r = "one" ++ (identCat . valCat) r
+ | isConsFun r = "cons" ++ (identCat . valCat) r
+ | isDefinedRule r = "d" ++ show n ++ "_r_" ++ funName r
+ | otherwise = "r_" ++ funName r
+
+
+-- Creates the literals for a grammar for Lark.
+larkLiterals :: CF -> String
+larkLiterals cf = unlines $ concat
+ [
+ ifC catString [createLiteral "String.2" "\"(\\\\.|[^\"])*\""]
+ , ifC catChar [createLiteral "Char.2" "\\'(\\\\x[0-9a-f][0-9a-f]|\\\\?[\\S\\s])\\'"]
+ , ifC catDouble [createLiteral "Double.2" "\\d+\\.\\d+(e-?\\d+)?"]
+ , ifC catInteger [createLiteral "Integer.2" "\\d+"]
+ -- Prolog requires user defined tokens to have priority over Ident; C
+ -- requires Double to have priority over user defined tokens, as C has
+ -- "CDouble" matching "3." in 3.14. The lexer definitions rely on the order
+ -- for priority, not the length.
+ , userDefTokens
+ , ifC catIdent [createLiteral "Ident" "[A-Za-z]\\w*"]
+ ]
+ where
+ ifC :: TokenCat -> [String] -> [String]
+ ifC cat s = if isUsedCat cf (TokenCat cat) then s else []
+
+ userDefTokens :: [String]
+ userDefTokens = [
+ createLiteral (name) (printRegFlex exp) | (name, exp) <- tokenPragmas cf
+ ]
+
+ createLiteral :: String -> String -> String
+ createLiteral name regex =
+ " " ++ map toUpper name ++ ": /" ++ regex ++ "/"
+
+
+-- Creates the class transformer, where each member method tells Lark how
+-- to transform some parsed node in the tree.
+createTransformer :: CF -> String
+createTransformer cf = unlines
+ [ "#transformer"
+ , "class TreeTransformer(Transformer):"
+ , unlines (map createRuleTransform rs)
+ , unlines (map (makeDefineTransform cf) enumeratedRDs)
+ , unlines (map createListTransform listRules)
+ , createTokenTransformers cf
+ ]
+ where
+ enumeratedRules :: [(Int, Rul RFun)]
+ enumeratedRules = enumerateAllDefinedRules (cfgRules cf) 1 []
+
+ rs = [r | r <- cfgRules cf
+ , not (isCoercion r)
+ , not (isNilCons r)
+ , not (isDefinedRule r)]
+ listRules = [r | r <- cfgRules cf, isNilCons r]
+
+ enumeratedRDs = [(n, r, d) | (n, r) <- enumeratedRules, d <- definitions cf
+ , not (isCoercion r)
+ , not (isNilCons r)
+ , isDefinedRule r
+ , nameCorresponds ((wpThing . defName) d) (funName r)]
+
+
+-- Creates a transform for a rule
+createRuleTransform :: Rul RFun -> String
+createRuleTransform r = unlines
+ [ " @v_args(inline=True)"
+ , " def r_" ++ map toLower (funName r) ++ "(self" ++
+ concat (map (", " ++) enumeratedVars) ++ "):"
+ , " return " ++ funName r ++ "(" ++ intercalate ", " enumeratedVars ++ ")"
+ ]
+ where
+ sentForm = rhsRule r
+ nvCats = numVars sentForm :: [Either (Cat, Doc) String]
+ enumeratedVars = [render d | (c, d) <- lefts nvCats]
+
+
+-- Creates a transform for a list rule.
+createListTransform :: Rul RFun -> String
+createListTransform r = unlines
+ [ " @v_args(inline=True)"
+ , " def " ++ map toLower name ++ "(self" ++
+ concat (map (", " ++) enumeratedVars) ++ "):"
+ , " return " ++ args
+ ]
+ where
+ name
+ | isNilFun r = "nil" ++ (identCat . valCat) r
+ | isOneFun r = "one" ++ (identCat . valCat) r
+ | isConsFun r = "cons" ++ (identCat . valCat) r
+ | otherwise = funName r
+
+ sentForm = rhsRule r
+ nvCats = numVars sentForm :: [Either (Cat, Doc) String]
+ enumeratedVars = [render d | (c, d) <- lefts nvCats]
+
+ args :: String
+ | isNilFun r = "[]"
+ | isOneFun r = "[" ++ head enumeratedVars ++ "]"
+ | isConsFun r = "[" ++ head enumeratedVars ++ "] + " ++
+ last enumeratedVars
+ | otherwise = error "Should be a list function"
+
+
+-- Creates the transformer functions for the tokens.
+createTokenTransformers :: CF -> String
+createTokenTransformers cf = unlines $ concat
+ [
+ ifC catString [createTokenTransform "String"]
+ , ifC catChar [createTokenTransform "Char"]
+ , ifC catDouble [createTokenTransform "Double"]
+ , ifC catInteger [createTokenTransform "Integer"]
+ -- Prolog requires user defined tokens to have priority over Ident; C
+ -- requires Double to have priority over user defined tokens, as C has
+ -- "CDouble" matching "3." in 3.14. The lexer definitions rely on the order
+ -- for priority, not the length.
+ , userDefTokens
+ , ifC catIdent [createTokenTransform "Ident"]
+ ]
+ where
+ ifC :: TokenCat -> [String] -> [String]
+ ifC cat s = if isUsedCat cf (TokenCat cat) then s else []
+
+ userDefTokens :: [String]
+ userDefTokens = [
+ createTokenTransform name | (name, exp) <- tokenPragmas cf
+ ]
+
+
+-- Creates a transform for a token.
+createTokenTransform :: String -> String
+createTokenTransform name = unlines
+ [ " @v_args(inline=True)"
+ , " def " ++ map toUpper name ++ "(self, token):"
+ , " return " ++ name ++ "(token.value)"
+ ]
+
+
+-- | Produces the production in the docstring for the parsing definitions.
+prodToDocStr ::[Either Cat String] -> String
+prodToDocStr [] = ""
+prodToDocStr (ec:[]) = ecsToDocStr ec
+prodToDocStr (ec:ecs) =
+ ecsToDocStr ec ++ " " ++ prodToDocStr ecs
+
+
+-- Converts a single element in the production.
+ecsToDocStr :: Either Cat String -> String
+ecsToDocStr (Left (TokenCat t)) = map toUpper t
+ecsToDocStr (Left c) = map toLower (translateToList (show c))
+ecsToDocStr (Right strOp) = "\"" ++ concat (map escapeBackslash strOp) ++ "\""
+
+
+-- | For single-line comments
+createLineCommentMatcher :: String -> String
+createLineCommentMatcher r = unlines
+ [ " C" ++ toOrd r ++ ": /" ++ concat (map escapeChar r) ++ "[^\\n]*/"
+ , " %ignore C" ++ toOrd r
+ ]
+
+
+-- | For multi-line comments
+createMultiLineCommentMatcher :: (String, String) -> String
+createMultiLineCommentMatcher (s, e) = unlines
+ [ " C" ++ toOrd (s ++ e) ++ ": /" ++ escaped s ++ "([\\s\\S]*?)" ++
+ escaped e ++ "/"
+ , " %ignore C" ++ toOrd (s ++ e)
+ ]
+ where
+ escaped s = concat $ map escapeChar s
+
+
+-- Since we're using a real string for the grammar, r""" ... """ it seems
+-- we can't escape everything in strOp from regflex. Only backslashes.
+escapeBackslash :: Char -> String
+escapeBackslash '\\' = "\\\\"
+escapeBackslash c = [c]
+
+
+-- | To compare names for defines. The first letter needs to be lowered, so
+-- "while" == "While".
+nameCorresponds :: String -> String -> Bool
+nameCorresponds (x:xs) (y:ys) = (toLower x == toLower y) && (xs == ys)
+nameCorresponds _ _ = error "Names can't be empty"
+
+
+-- Creates a transformer for a rule with its corresponding define.
+makeDefineTransform ::
+ CF -> (Int, Rul RFun, Define) -> String
+makeDefineTransform cf (n, defRule, defi) = unlines
+ [ " @v_args(inline=True)"
+ , " def d" ++ show n ++ "_r_" ++ map toLower name ++ "(self" ++
+ concat (map (", " ++) enumeratedVars) ++ "):"
+ , " return " ++ expToDef env2 (defBody defi)
+ , ""
+ ]
+ where
+ name = (wpThing . defName) defi
+ sentForm = rhsRule defRule
+ args = map fst (defArgs defi)
+ nvCats = numVars sentForm :: [Either (Cat, Doc) String]
+ enumeratedVars = [render d | (c, d) <- lefts nvCats]
+ env2 = zip args enumeratedVars
+
+
+-- | Converts the production of a define, called an expression, to a
+-- production for the parsing definition.
+expToDef :: [(String, String)] -> Exp -> String
+expToDef env (App "(:)" _ (e:[App "[]" _ _])) = expToDef env e ++ "]"
+expToDef env (App "(:)" _ (e:[recList])) = "[" ++ expToDef env e ++ ", " ++
+ expToDef env recList
+expToDef _ (App "[]" _ _) = "[]"
+expToDef env (App fName _ exps) =
+ fName ++ "(" ++ addCommas (map (expToDef env) exps) ++ ")"
+expToDef env (Var s) = case lookup s env of
+ Just p -> p
+ Nothing -> error "Missing variable in define enviroment"
+expToDef _ (LitInt i) = "Integer(" ++ show i ++ ")"
+expToDef _ (LitDouble d) = "Double(" ++ show d ++ ")"
+expToDef _ (LitChar s) = "Char(\"" ++ show s ++ "\")"
+expToDef _ (LitString s) = "String('" ++ show s ++ "')"
+
+
+-- A placeholder variable to store additional information, for say type
+-- annotation.
placeholderVariableClass :: String
placeholderVariableClass = unlines
[ "# Placeholder to add additional information to a node in the AST," ++
@@ -98,6 +379,7 @@ placeholderVariableClass = unlines
, " return str(self.__v.__class__)"
]
+
-- | Creates a parsing definition that points to all entrypoints.
createCommonEntrypointDef :: CF -> String
createCommonEntrypointDef cf = unlines
@@ -126,37 +408,6 @@ createValueCatClass :: String -> String
createValueCatClass s = "class " ++ s ++ ":\n\tpass\n"
--- | Creates a parsing definition, by checking what type of rule it is and
--- calling the corresponding make function.
-ruleToParsingDef :: CF -> [(String, String)] -> Rul RFun -> String
-ruleToParsingDef cf tokensPly rule
- | isCoercion funcRStr =
- makeParseCoercion cf tokensPly funcCat (fName, sentForm)
- | isNilFun funcRStr =
- makeParseNil tokensPly funcCat (fNameTranslated, sentForm)
- | isOneFun funcRStr =
- makeParseOne cf tokensPly funcCat (fNameTranslated, sentForm)
- | isConsFun funcRStr =
- makeParseCons cf tokensPly funcCat (fNameTranslated, sentForm)
- | isDefinedRule rule =
- error "Should not generate define rules in this step"
- | otherwise =
- makeParseFunc cf tokensPly funcCat (fName, sentForm)
- where
- funcRStr = funRule rule :: RString
- fName = wpThing funcRStr :: String
-
- funcCat = valCat rule :: Cat
- catStr = show (valCat rule) :: String
-
- fNameTranslated :: String
- fNameTranslated
- | isNilFun funcRStr = catStr
- | otherwise = fName
-
- sentForm = rhsRule rule :: [Either Cat String]
-
-
-- | Make a Python class from a rule's name and production.
makePythonClass :: Rul RFun -> String
makePythonClass rule =
@@ -176,207 +427,9 @@ makePythonClass rule =
["_ann_type: _AnnType = field(default_factory=_AnnType)"])
-
-- | Creates the corresponding type hinting for some member variable.
strCatToPyTyping :: String -> String
strCatToPyTyping s =
if strIsList s then "_List['" ++ (tail . init) s ++ "']" else s
--- | It could be this is only guarding against list categories.
-literalsToPytypeMaybe :: CF -> String -> Maybe String
-literalsToPytypeMaybe cf s = case s of
- "Integer" -> Just "Integer"
- "Double" -> Just "Double"
- "Char" -> Just "Char"
- "String" -> Just "String"
- "Ident" -> Just "Ident"
- _ -> if s `elem` (tokenNames cf) then Just s else Nothing
-
-
--- | The following makeParse functions create their corresponding parsing
--- definitions for some rule.
-makeParseFunc :: CF -> [(String, String)] -> Cat -> (String, SentForm)
- -> String
-makeParseFunc cf tokensPly dataCat (name, sentForm) = unlines
- [ "def " ++ "p_" ++ name ++ "(p):\n" ++ "\t" ++ "\"\"\""
- , "\t" ++ (show dataCat) ++ " : " ++ (prodToDocStr tokensPly sentForm)
- , "\t" ++ "\"\"\""
- , "\t" ++ "p[0] = " ++ rhs ++ "\n"
- ]
- where
- rhs = name ++ "(" ++ (addCommas (getLeftIndexes cf 1 sentForm)) ++ ")"
-
-
-makeParseCoercion :: CF -> [(String, String)] -> Cat -> (String, SentForm)
- -> String
-makeParseCoercion cf tokensPly dataCat (_, sentForm) = unlines
- [ "def " ++ "p_" ++ (show sourceCat) ++ "(p):\n" ++ "\t" ++ "\"\"\""
- , "\t" ++ (show dataCat) ++ " : " ++ (prodToDocStr tokensPly sentForm)
- , "\t" ++ "\"\"\""
- , "\t" ++ "p[0] = " ++ strP ++ "\n"
- ]
- where
- strP = head (getLeftIndexes cf 1 sentForm)
- sourceCat = (head . lefts) sentForm
-
-
-makeParseNil :: [(String, String)] -> Cat -> (String, SentForm) -> String
-makeParseNil tokensPly dataCat (_, sentForm) = unlines
- [ "def " ++ "p_" ++ "Nil" ++ translatedCat ++ "(p):\n" ++ "\t" ++ "\"\"\""
- , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm)
- , "\t" ++ "\"\"\""
- , "\t" ++ "p[0] = []\n"
- ]
- where
- translatedCat = translateToList $ show dataCat
-
-
-makeParseOne :: CF -> [(String, String)] -> Cat -> (String, SentForm) -> String
-makeParseOne cf tokensPly dataCat (_, sentForm) = unlines
- [ "def " ++ "p_" ++ "One" ++ translatedCat ++ "(p):\n" ++ "\t" ++ "\"\"\""
- , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm)
- , "\t" ++ "\"\"\""
- , "\t" ++ "p[0] = " ++ rhs ++ "\n"
- ]
- where
- translatedCat = translateToList $ show dataCat
- rhs = intercalate " + " (getLeftIndexesLists tokensPly cf 1 sentForm)
-
-
-makeParseCons :: CF -> [(String, String)] -> Cat -> (String, SentForm)
- -> String
-makeParseCons cf tokensPly dataCat (_, sentForm) = unlines
- [ "def " ++ "p_" ++ "Cons" ++ translatedCat ++ "(p):\n" ++ "\t" ++ "\"\"\""
- , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm)
- , "\t" ++ "\"\"\"" ++ "\n"
- , "\t" ++ "p[0] = " ++ rhs ++ "\n"
- ]
- where
- translatedCat = translateToList $ show dataCat
- rhs = intercalate " + " (getLeftIndexesLists tokensPly cf 1 sentForm)
-
-
--- | Produces a list of the elements in the code production, where the indices
--- match the argument categories.
-getLeftIndexesLists :: [(String, String)] -> CF -> Int -> [Either Cat String]
- -> [String]
-getLeftIndexesLists _ _ _ [] = []
-getLeftIndexesLists tokensPly cf n (Left c:ecs)
- | isList c = [typedPTerm] ++ (getLeftIndexesLists tokensPly cf (n+1) ecs)
- | otherwise = ["[" ++ typedPTerm ++ "]"] ++
- (getLeftIndexesLists tokensPly cf (n+1) ecs)
- where
- pTerm = "p[" ++ (show n) ++ "]"
- typedPTerm = case literalsToPytypeMaybe cf (show c) of
- Just s -> s ++ "(" ++ pTerm ++ ")"
- Nothing -> pTerm
-getLeftIndexesLists tokensPly cf n (Right strOp:ecs)
- | separatorIsEmpty tokensPly strOp = getLeftIndexesLists tokensPly cf n ecs
- | otherwise = getLeftIndexesLists tokensPly cf (n+1) ecs
-
-
--- | In case the deliminator is "" or is not defined for the lexer, like
--- ignored characters.
-separatorIsEmpty :: [(String, String)] -> String -> Bool
-separatorIsEmpty tokensPly strOp
- | length strOp > 0 = case lookup strOp tokensPly of
- Just _ -> False
- Nothing -> True
- | otherwise = True
-
-
--- | Produces a list of the elements in the code production, where the indices
--- match the argument categories.
-getLeftIndexes :: CF -> Int -> [Either Cat String] -> [String]
-getLeftIndexes _ _ [] = []
-getLeftIndexes cf n (Left c:ecs) = [typedPTerm] ++
- (getLeftIndexes cf (n+1) ecs)
- where
- pTerm = "p[" ++ (show n) ++ "]"
- typedPTerm = case literalsToPytypeMaybe cf (show c) of
- Just s -> s ++ "(" ++ pTerm ++ ")"
- Nothing -> pTerm
-getLeftIndexes cf n (Right _:ecs) = getLeftIndexes cf (n+1) ecs
-
-
--- | Produces the production in the docstring for the parsing definitions.
-prodToDocStr :: [(String, String)] -> [Either Cat String] -> String
-prodToDocStr _ [] = ""
-prodToDocStr tokensPly (ec:[]) = ecsToDocStr tokensPly ec
-prodToDocStr tokensPly (ec:ecs) =
- ecsToDocStr tokensPly ec ++ " " ++ prodToDocStr tokensPly ecs
-
-
--- Converts a single element in the production.
-ecsToDocStr :: [(String, String)] -> Either Cat String -> String
-ecsToDocStr _ (Left c) = translateToList $ show c
-ecsToDocStr tokensPly (Right strOp) = case lookup strOp tokensPly of
- (Just s) -> s
- Nothing -> ("") -- We assume it is no token, this affects getLeftIndexes
-
-
--- | Creating the parsing definitions for the defines.
-makeDefineParsingDefs :: CF -> [(String, String)] -> [String]
-makeDefineParsingDefs cf tokensPly = defFuncsPy
- where
- rules = cfgRules cf
-
- definedRules :: [Rul RFun]
- definedRules = [r | r <- rules, isDefinedRule r]
-
- pairs :: [(Rul RFun, Define)]
- pairs = [(dr, d) | dr <- definedRules, d <- definitions cf,
- nameCorresponds ((wpThing . defName) d) (funName dr)]
-
- -- Adds a number to the name to make each define separate.
- numberedPairs = zip [1..] pairs
- defFuncsPy = map (makeDefineParsingDef cf tokensPly) numberedPairs
-
-
--- | To compare names for defines. The first letter needs to be lowered, so
--- "while" == "While".
-nameCorresponds :: String -> String -> Bool
-nameCorresponds (x:xs) (y:ys) = (toLower x == toLower y) && (xs == ys)
-nameCorresponds _ _ = error "Names can't be empty"
-
-
--- | Creates a define parsing definition.
-makeDefineParsingDef ::
- CF -> [(String, String)] -> (Int, (Rul RFun, Define)) -> String
-makeDefineParsingDef cf tokensPly (n, (defRule, defi)) = unlines
- [ "def p_D" ++ (show n) ++ name ++ "(p):"
- , "\t\"\"\""
- , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm)
- , "\t\"\"\""
- , "\t# " ++ show env
- , "\tp[0] = " ++ expToDef env (defBody defi)
- , ""
- ]
- where
- name = (wpThing . defName) defi
- translatedCat = translateToList $ (catToStr . valCat) defRule
- sentForm = rhsRule defRule
- indexes = getLeftIndexes cf 1 sentForm
- args = map fst (defArgs defi)
- env = zip args indexes
-
-
--- | Converts the production of a define, called an expression, to a
--- production for the parsing definition.
-expToDef :: [(String, String)] -> Exp -> String
-expToDef env (App "(:)" _ (e:[App "[]" _ _])) = expToDef env e ++ "]"
-expToDef env (App "(:)" _ (e:[recList])) = "[" ++ expToDef env e ++ ", " ++
- expToDef env recList
-expToDef _ (App "[]" _ _) = "[]"
-expToDef env (App fName _ exps) =
- fName ++ "(" ++ addCommas (map (expToDef env) exps) ++ ")"
-expToDef env (Var s) = case lookup s env of
- Just p -> p
- Nothing -> error "Missing variable in define enviroment"
-expToDef _ (LitInt i) = "Integer(" ++ show i ++ ")"
-expToDef _ (LitDouble d) = "Double(" ++ show d ++ ")"
-expToDef _ (LitChar s) = "Char(\"" ++ show s ++ "\")"
-expToDef _ (LitString s) = "String('" ++ show s ++ "')"
-
-
diff --git a/source/src/BNFC/Backend/Python/CFtoPyLex.hs b/source/src/BNFC/Backend/Python/CFtoPyLex.hs
deleted file mode 100644
index fd1a532a..00000000
--- a/source/src/BNFC/Backend/Python/CFtoPyLex.hs
+++ /dev/null
@@ -1,189 +0,0 @@
-
-{-
- BNF Converter: Python lexer generator
- Copyright (C) 2024 Author: Bjorn Werner
--}
-
-module BNFC.Backend.Python.CFtoPyLex ( cf2PyLex ) where
-
-import BNFC.CF
-
-import BNFC.Backend.Python.RegToFlex (printRegFlex, escapeChar)
-import BNFC.Backend.Python.PyHelpers
-
-
--- | The entrypoint, returns LexTokens.py and the unicode mapping.
-cf2PyLex :: CF -> (String, [(String, String)])
-cf2PyLex cf = (, tokensPly) $ unlines
- [ "import ply.lex as lex\n"
- , ""
- , createReservedMap reservedWordsEnv
- , "# PLY tokens:\n" ++ plyTokens ++ "\n"
- , "# PLY tokens with RegEx:"
- , unlines plyTokensRegEx
- , "# Literals:"
- , plyLiterals cf
- , "# Comments:"
- , unlines singleComments
- , unlines multiComments
- , footer
- ]
- where
- -- The reserved keywords and the symbols are zipped with a
- -- unicode representation, which are needed for the parsing.
-
- -- Reserved keywords -> [("int", "R_...")]
- reservedWordsVar :: [String]
- reservedWordsVar = reservedWords cf
-
- reservedWordsEnv :: [(String, String)]
- reservedWordsEnv =
- zip reservedWordsVar (map (("R" ++) . toOrd) reservedWordsVar)
-
- -- Symbols -> [("+", "S_43")]
- literalsVar :: [String]
- literalsVar = literals cf
-
- strOps :: [String]
- strOps = map fst (cfTokens cf)
-
- strOpsFiltered = filterOut strOps reservedWordsVar
- strOpsFilteredSymbols = map (("S" ++) . toOrd) strOpsFiltered
-
- strOpsAndSymbols :: [(String, String)]
- strOpsAndSymbols = zip strOpsFiltered strOpsFilteredSymbols
-
- presentSymbols :: [String]
- presentSymbols =
- map addCitationSigns (strOpsFilteredSymbols ++ literalsVar)
-
- -- Defining the variables for the lexer.
- plyTokens =
- "tokens = reserved + (" ++ concat (map (++ ",") presentSymbols) ++ ")"
- plyTokensRegEx = map createRegEx strOpsAndSymbols
-
- tokensPly :: [(String, String)]
- tokensPly = reservedWordsEnv ++ strOpsAndSymbols
-
- -- Comments
- (multiMatchers, singleMatchers) = comments cf
- singleComments = map createLineCommentMatcher singleMatchers
- multiComments = map createMultiLineCommentMatcher multiMatchers
-
-
--- | Creates tokens for the lexer, such as "t_S_43 = r'\+'".
-createRegEx :: (String, String) -> String
-createRegEx (s, u) = "t_" ++ u ++ " = r'" ++ concat (map escapeChar s) ++ "'"
-
-
--- | For single-line comments
-createLineCommentMatcher :: String -> String
-createLineCommentMatcher r = unlines
- [ "def t_C" ++ (toOrd r) ++ "(t):"
- , "\tr'" ++ concat (map escapeChar r) ++ ".*'"
- , "\tpass"
- ]
-
-
--- | For multi-line comments
-createMultiLineCommentMatcher :: (String, String) -> String
-createMultiLineCommentMatcher (s, e) = unlines
- [ "def t_C" ++ (toOrd (s ++ e)) ++ "(t):"
- , "\tr'" ++ (escaped s) ++ "([\\s\\S]*?)" ++ (escaped e) ++ "'"
- , "\tpass"
- ]
- where
- escaped s = concat $ map escapeChar s
-
-
--- | The reserved_map contains mappings for reserved keywords,
--- such as 'int' : 'R_105_110_116'.
-createReservedMap :: [(String, String)] -> String
-createReservedMap xs = unlines
- [ "reserved_map = {"
- , unlines rows
- , "}"
- , ""
- , "reserved = ("
- , unlines rowsSnd
- , ")"
- ]
- where
- rows :: [String]
- rows = ["\t'" ++ w ++ "' : '" ++ u ++ "'," | (w, u) <- xs]
-
- rowsSnd = ["\t'" ++ u ++ "'," | (_, u) <- xs]
-
-
--- | Creates lexer definitions for the lexer which are interpreted using
--- the inspect module to retrieve useful information, for example:
--- def t_String(t):
--- r'"[^"]+"'
--- t.type = reserved_map.get(t.value, ’String’)
--- return t
-plyLiterals :: CF -> String
-plyLiterals cf = unlines $ concat
- [
- ifC catString [createLexFunc "String" "\"(\\\\\"|[^\"])*\""]
- , ifC catChar
- [createLexFunc "Char" "\\'(\\\\x[0-9a-f][0-9a-f]|\\\\?[\\S\\s])\\'"]
- , ifC catDouble [createLexFunc "Double" "\\d+\\.\\d+(e-?\\d+)?"]
- , ifC catInteger [createLexFunc "Integer" "\\d+"]
- -- Prolog requires user defined tokens to have priority over Ident; C
- -- requires Double to have priority over user defined tokens, as C has
- -- "CDouble" matching "3." in 3.14. The lexer definitions rely on the order
- -- for priority, not the length.
- , userDefTokens
- , ifC catIdent [createLexFunc "Ident" "[A-Za-z]\\w*"]
- -- If there is no Ident present, we need a lexer definition for reserved
- -- words:
- , if not (isUsedCat cf (TokenCat catIdent)) && length (reservedWords cf) > 0
- then [createLexFunc "" "[A-Za-z]\\w*"]
- else []
- ]
- where
- ifC :: TokenCat -> [String] -> [String]
- ifC cat s = if isUsedCat cf (TokenCat cat) then s else []
-
- userDefTokens :: [String]
- userDefTokens = [
- createLexFunc name (printRegFlex exp) | (name, exp) <- tokenPragmas cf
- ]
-
-
--- | Creates a Lexing definition for a Literal
--- If no Literal name is used, this is just a reserved_map lookup.
-createLexFunc :: String -> String -> String
-createLexFunc name regex = unlines
- [ "def t_" ++ (if name /= "" then name else "_NoIdentPresent") ++ "(t):"
- , "\tr'" ++ regex ++ "'"
- , if name /= ""
- then "\tt.type = reserved_map.get(t.value, '" ++ name ++ "')"
- else "\tt.type = reserved_map.get(t.value)"
- , "\treturn t"
- ]
-
-
--- | Adds lexer definitions to ignore whitespaces, and a testing block
--- which attempts tokenize some input, like: python3 LexTokens.py < input
-footer :: String
-footer = unlines
- [ "# Ignored characters:"
- , "t_ignore = ' \\t'"
- , ""
- , "# Ignored token with an action associated with it:"
- , "def t_ignore_newline(t):"
- , "\tr'\\n+'"
- , "\tt.lexer.lineno += t.value.count('\\n')"
- , ""
- , "# Error handler for illegal characters:"
- , "def t_error(t):"
- , "\tprint('Illegal character', 'line', str(t.lineno) + ':', t.value[0], 'ascii:', ord(t.value[0]))"
- , "\tquit()"
- , ""
- , "if __name__ == \"__main__\":"
- , "\tlexer = lex.lex()"
- , "\tlex.runmain(lexer)"
- ]
-
-
From 283dfd833168260c5512cd86f085583456a80904 Mon Sep 17 00:00:00 2001
From: AiStudent
Date: Tue, 5 Nov 2024 16:09:08 +0100
Subject: [PATCH 6/8] Retargeted to Lark
---
docs/user_guide.rst | 55 ++++++-------
document/BNF_Converter_Python_Mode.html | 68 ++++++-----------
source/src/BNFC/Backend/Python.hs | 85 +++++++++++----------
source/src/BNFC/Backend/Python/CFtoPyAbs.hs | 10 +--
4 files changed, 98 insertions(+), 120 deletions(-)
diff --git a/docs/user_guide.rst b/docs/user_guide.rst
index 4e517da9..c4e1e287 100644
--- a/docs/user_guide.rst
+++ b/docs/user_guide.rst
@@ -289,10 +289,9 @@ Python Backend
===============
The BNF Converter's Python Backend generates a Python frontend, that uses
-`PLY `_ (Python Lex Yacc), to parse
-input into an abstract syntax tree.
+Lark, to parse input into an AST (abstract syntax tree).
-Python 3.10 or higher is needed.
+Lark and Python 3.10 or higher is needed.
Example usage: ::
@@ -307,10 +306,8 @@ Example usage: ::
- Description
* - bnfcPyGenCalc/Absyn.py
- Provides the classes for the abstract syntax.
- * - bnfcPyGenCalc/LexTokens.py
- - Provides PLY with the information needed to build the lexer.
* - bnfcPyGenCalc/ParserDefs.py
- - Provides PLY with the information needed to build the parser.
+ - Provides Lark with the information needed to build the lexer and parser.
* - bnfcPyGenCalc/PrettyPrinter.py
- Provides printing for both the AST and the linearized tree.
* - genTest.py
@@ -318,7 +315,7 @@ Example usage: ::
* - skele.py
- Provides skeleton code to deconstruct an AST, using structural pattern matching.
-Optionally one may with ``-m`` also create a makefile that contains the target
+Optionally one may with ``-m``` also create a makefile that contains the target
"distclean" to remove the generated files.
Testing the frontend
@@ -340,34 +337,28 @@ and it's possible to just use an argument::
Caveats
.......
-Presentation of conflicts in a grammar:
-
- A symbol-to-unicode transformation is made for the terminals in the grammar,
- for example from "++" to "S_43_43". This however obfuscates PLYs generated
- information of the grammar in the "parser.out" file. Users are hence
- encouraged to use the Haskell backend to debug grammars and identify
- conflicts.
-
Several entrypoints:
+ The testfile genTest.py only uses the first entrypoint used by default. To
+ use all entrypoints, set the start parameter to "start_". If the
+ entrypoints cause reduce/reduce conflicts, a lark GrammarError will be
+ produced.
- At the top of the ParserDefs.py file an additional rule is added, that has
- every defined entrypoint as a possible production. This may create warnings
- for conflicts, as it may introduce ambiguity. Therefore the added
- parsing rule is by default removed beneath the function, with the statement
- "del p__Start", and included if the user comments out the removal of
- "p__Start".
-
-Special cases for special characters:
+Results from the parameterized tests:
+ While the Python backend generates working frontends for the example
+ grammars, five "failures" and six "errors" among the regression
+ tests are reported.
- Using non-special characters, instead of say parentheses when defining rules,
- may not yield the expected behaviour. Using the below rule, an expression
- such as "a1+2a" can not be parsed since the a's are classified as reserved
- keywords, like "int", instead of symbols like "+"::
+Skeleton code for using lists as entrypoints:
+ Matchers for using lists, such as [Exp], are not generated in the
+ skeleton code as it may confuse users if the grammar uses several different
+ list categories, as a user may then try to pattern match lists without
+ checking what type the elements have. Users are instead encouraged to use
+ non-list entrypoints.
- _. Exp1 ::= "a" Exp "a" ;
+Using multiple separators
+ Using multiple separators for the same category, such as below, generates
+ Python functions with overlapping names, causing runtime errors.::
-Results from the parameterized tests:
+ separator Exp1 "," ;
+ separator Exp1 ";" ;
- While the Python backend generates working frontends for the example
- grammars, four "failures" and six "errors" among the regression
- tests are reported.
diff --git a/document/BNF_Converter_Python_Mode.html b/document/BNF_Converter_Python_Mode.html
index 8021b3e1..4ffb46f6 100644
--- a/document/BNF_Converter_Python_Mode.html
+++ b/document/BNF_Converter_Python_Mode.html
@@ -27,15 +27,15 @@ By Björn Werner
2024
The BNF Converter's Python Backend generates a Python frontend, that uses
- PLY (Python Lex Yacc), to parse input into an AST (abstract syntax tree).
+ Lark, to parse input into an AST (abstract syntax tree).
BNFC on Github:
https://github.com/BNFC/bnfc
- PLY homepage:
- https://www.dabeaz.com/ply/ply.html
+ Lark github:
+ https://github.com/lark-parser/lark
Python 3.10 or higher is needed.
@@ -51,14 +51,11 @@
Usage
Filename: | Description: |
-
- bnfcGenNAME/LexTokens.py | Provides PLY with the information needed to build the lexer. |
-
bnfcGenNAME/Absyn.py | Provides the classes for the abstract syntax. |
- bnfcGenNAME/ParserDefs.py | Provides PLY with the information needed to build the parser. |
+ bnfcGenNAME/ParserDefs.py | Provides Lark with the information needed to build the lexer and parser. |
bnfcGenNAME/PrettyPrinter.py | Provides printing for both the AST and the linearized tree. |
@@ -79,7 +76,6 @@ Testing the frontend
$ python3 genTest.py < hello.c
- Generating LALR tables
Parse Successful!
[Abstract Syntax]
@@ -92,9 +88,6 @@
Testing the frontend
return 0;
}
-
- The LALR tables are cached in a file called "parsetab.py", and a description by PLY of the grammar is stored in a file called "parser.out".
-
The Abstract Syntax Tree
The AST is built up using instances of Python classes, using the dataclass decorator, such as:
@@ -150,34 +143,15 @@
Using the skeleton file
Known issues
-
- Presentation of conflicts in a grammar:
-
-
- A symbol-to-unicode transformation is made for the terminals in the grammar,
- for example from "++" to "S_43_43". This however obfuscates PLYs generated
- information of the grammar, inside the "parser.out" file. Users are hence
- encouraged to use say the Haskell backend to debug their
- grammars and identify conflicts.
-
-
- Several entrypoints:
-
-
- At the top of the ParserDefs.py file an additional rule is added, that has
- every defined entrypoint as a possible production. This may create warnings
- for conflicts if it introduces ambiguity, and warnings for unused rules if
- the "_Start" category is not used as the entrypoint. Therefore the added
- parsing rule is by default removed beneath the function, "del p__Start",
- and included if the user comments out the removal:
-
Skeleton code for using lists as entrypoints:
Matchers for using lists, such as [Exp], are not generated in the
skeleton code as it may confuse users if the grammar uses several different
- list categories. Users are instead encouraged to use a non-list entrypoint.
+ list categories, as a user may then try to pattern match lists without
+ checking what type the elements have. Users are instead encouraged to use
+ non-list entrypoints.
The improper way to iterate over lists, as the value category is unknown:
@@ -195,16 +169,6 @@
for exp in listexp_:
...
-
- Special cases for special characters
-
-
- Using non-special characters instead of say parentheses when defining rules, may not yield the expected
- behaviour. Using the below rule, an expression such as "a1+2a" can not be parsed.
-
-
- _. Exp1 ::= "a" Exp "a" ;
-
Using multiple separators
@@ -215,4 +179,20 @@
separator Exp1 "," ;
separator Exp1 ";" ;
-
\ No newline at end of file
+
+Several entrypoints:
+
+ The testfile genTest.py only uses the first entrypoint used by default. To
+ use all entrypoints, set the start parameter to "start_". If the
+ entrypoints cause reduce/reduce conflicts, a lark GrammarError will be
+ produced.
+
+
+Results from the parameterized tests:
+
+
+ While the Python backend generates working frontends for the example
+ grammars, five "failures" and six "errors" among the regression
+ tests are reported.
+
+
diff --git a/source/src/BNFC/Backend/Python.hs b/source/src/BNFC/Backend/Python.hs
index 13568e38..87ce07bb 100644
--- a/source/src/BNFC/Backend/Python.hs
+++ b/source/src/BNFC/Backend/Python.hs
@@ -18,6 +18,7 @@ import BNFC.Backend.Python.CFtoPyPrettyPrinter (cf2PyPretty)
import BNFC.Backend.Python.CFtoPySkele (cf2PySkele)
import BNFC.Backend.Python.PyHelpers
import BNFC.PrettyPrint
+import Data.Char (toLower)
import qualified BNFC.Backend.Common.Makefile as Makefile
@@ -87,44 +88,50 @@ comment x = "# " ++ x
-- Produces the content for the testing file, genTest.py.
pyTest :: String -> CF -> String
pyTest pkgName cf = unlines
- [
- "import sys",
- "from " ++ pkgName ++ ".ParsingDefs import *",
- "from " ++ pkgName ++ ".PrettyPrinter import *",
- "",
- "",
- "# Suggested input options:",
- "# python3 genTest.py < sourcefile",
- "# python3 genTest.py sourcefile inputfile (i.e. for interpreters).",
- "inputFile = None",
- "if len(sys.argv) > 1:",
- "\tf = open(sys.argv[1], 'r')",
- "\tinp = f.read()",
- "\tf.close()",
- "\tif len(sys.argv) > 2:",
- "\t\tinputFile = sys.argv[2]",
- "else:",
- "\tinp = ''",
- "\tfor line in sys.stdin:",
- "\t\tinp += line",
- "",
- "def onError(e):",
- " print(e)",
- " print('Parse failed')",
- " quit(1)",
- "",
- "# By default the first entrypoint is used. See ParsingDefs.py for alternatives.",
- "ast = parser.parse(inp, on_error=onError)",
- "if ast: # and not lexer.syntaxError:",
- "\tprint('Parse Successful!\\n')",
- "\tprint('[Abstract Syntax]')",
- "\tprint(printAST(ast))",
- "\tprint('\\n[Linearized Tree]')",
- "\tlinTree = lin(ast)",
- "\tprint(renderC(linTree))",
- "\tprint()",
- "else:",
- "\tprint('Parse failed')",
- "\tquit(1)"
+ [ "import sys"
+ , "from " ++ pkgName ++ ".ParsingDefs import *"
+ , "from " ++ pkgName ++ ".PrettyPrinter import *"
+ , ""
+ , "# Suggested input options:"
+ , "# python3 genTest.py < sourcefile"
+ , "# python3 genTest.py sourcefile inputfile (i.e. for interpreters)."
+ , "inputFile = None"
+ , "if len(sys.argv) > 1:"
+ , "\tf = open(sys.argv[1], 'r')"
+ , "\tinp = f.read()"
+ , "\tf.close()"
+ , "\tif len(sys.argv) > 2:"
+ , "\t\tinputFile = sys.argv[2]"
+ , "else:"
+ , "\tinp = ''"
+ , "\tfor line in sys.stdin:"
+ , "\t\tinp += line"
+ , ""
+ , "def onError(e):"
+ , " print(e)"
+ , " print('Parse failed')"
+ , " quit(1)"
+ , ""
+ , "# Creates the Lark parser with the given grammar. By default to the first"
+ , "# entrypoint. Other entrypoints exist in ParsingDefs.py."
+ , "parser = Lark(grammar, start='" ++ defaultEntrypoint ++ "', parser='lalr', lexer='basic', transformer=TreeTransformer())"
+ , ""
+ , "# By default the first entrypoint is used. See ParsingDefs.py for alternatives."
+ , "ast = parser.parse(inp, on_error=onError)"
+ , "if ast: # and not lexer.syntaxError:"
+ , "\tprint('Parse Successful!\\n')"
+ , "\tprint('[Abstract Syntax]')"
+ , "\tprint(printAST(ast))"
+ , "\tprint('\\n[Linearized Tree]')"
+ , "\tlinTree = lin(ast)"
+ , "\tprint(renderC(linTree))"
+ , "\tprint()"
+ , "else:"
+ , "\tprint('Parse failed')"
+ , "\tquit(1)"
]
+ where
+ defaultEntrypoint = map toLower
+ ((translateToList . show . firstEntry) cf)
+
diff --git a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
index adad97f3..3daefee6 100644
--- a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
+++ b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
@@ -30,10 +30,6 @@ cf2PyAbs pkgName cf = ( unlines
, createGrammar cf
, createTransformer cf
, ""
- , "# Create Lark parser with the given grammar"
- , "parser = Lark(grammar, start='start', parser='lalr', lexer='basic', " ++
- "transformer=TreeTransformer())"
- , ""
]
, unlines
["from typing import List as _List"
@@ -78,7 +74,7 @@ cf2PyAbs pkgName cf = ( unlines
createGrammar :: CF -> String
createGrammar cf = unlines
[ "grammar = r\"\"\""
- , " ?start: " ++ map toLower ((translateToList .show . firstEntry) cf)
+ , " ?start_: " ++ entryOrClause
, ""
, unlines orClauses
, larkLiterals cf
@@ -100,6 +96,10 @@ createGrammar cf = unlines
singleComments = map createLineCommentMatcher singleMatchers
multiComments = map createMultiLineCommentMatcher multiMatchers
+ strListEntryPoints = map ((map toLower) . translateToList . show)
+ ((List1.toList . allEntryPoints) cf)
+ entryOrClause = intercalate "\n | " strListEntryPoints
+
-- Enumerates all (only defined relevant) rules to prevent naming overlap.
enumerateAllDefinedRules :: [Rul RFun] -> Int -> [(Int, Rul RFun)]
From f49fa38aa4a8a3d4f544fd8d12c6d839567d9612 Mon Sep 17 00:00:00 2001
From: AiStudent
Date: Mon, 11 Nov 2024 02:18:44 +0100
Subject: [PATCH 7/8] 0 failures and 3 errors left due to an example with 1000+
recursion and to the lex prio not working as expected
---
source/src/BNFC/Backend/Python.hs | 50 +++----
source/src/BNFC/Backend/Python/CFtoPyAbs.hs | 133 ++++++++----------
.../Backend/Python/CFtoPyPrettyPrinter.hs | 43 +++---
source/src/BNFC/Backend/Python/CFtoPySkele.hs | 55 ++++----
source/src/BNFC/Backend/Python/PyHelpers.hs | 56 ++++++++
5 files changed, 188 insertions(+), 149 deletions(-)
diff --git a/source/src/BNFC/Backend/Python.hs b/source/src/BNFC/Backend/Python.hs
index 87ce07bb..e0dc012d 100644
--- a/source/src/BNFC/Backend/Python.hs
+++ b/source/src/BNFC/Backend/Python.hs
@@ -18,14 +18,14 @@ import BNFC.Backend.Python.CFtoPyPrettyPrinter (cf2PyPretty)
import BNFC.Backend.Python.CFtoPySkele (cf2PySkele)
import BNFC.Backend.Python.PyHelpers
import BNFC.PrettyPrint
-import Data.Char (toLower)
+import Data.Char (toLower, isLetter)
import qualified BNFC.Backend.Common.Makefile as Makefile
-- | Entrypoint for BNFC to use the Python backend.
makePython :: SharedOptions -> CF -> MkFiles ()
makePython opts cf = do
- let pkgName = "bnfcPyGen" ++ name
+ let pkgName = "bnfcPyGen" ++ filter isLetter name
let (parsingDefs, abstractClasses) = cf2PyAbs pkgName cf
let prettyPrinter = cf2PyPretty pkgName cf
let skeletonCode = cf2PySkele pkgName cf
@@ -47,7 +47,7 @@ makefile :: String -> Maybe String -> String -> Doc
makefile pkgName optMakefileName basename = vcat
[
Makefile.mkRule "all" []
- [ " " ]
+ [ "@echo \"Doing nothing: No compilation of the parser needed.\"" ]
, Makefile.mkRule "clean" []
[ "rm -f parser.out parsetab.py" ]
, Makefile.mkRule "distclean" [ "vclean" ] []
@@ -90,27 +90,27 @@ pyTest :: String -> CF -> String
pyTest pkgName cf = unlines
[ "import sys"
, "from " ++ pkgName ++ ".ParsingDefs import *"
- , "from " ++ pkgName ++ ".PrettyPrinter import *"
+ , "from " ++ pkgName ++ ".PrettyPrinter import printAST, lin, renderC"
, ""
, "# Suggested input options:"
, "# python3 genTest.py < sourcefile"
, "# python3 genTest.py sourcefile inputfile (i.e. for interpreters)."
, "inputFile = None"
, "if len(sys.argv) > 1:"
- , "\tf = open(sys.argv[1], 'r')"
- , "\tinp = f.read()"
- , "\tf.close()"
- , "\tif len(sys.argv) > 2:"
- , "\t\tinputFile = sys.argv[2]"
+ , " f = open(sys.argv[1], 'r')"
+ , " inp = f.read()"
+ , " f.close()"
+ , " if len(sys.argv) > 2:"
+ , " inputFile = sys.argv[2]"
, "else:"
- , "\tinp = ''"
- , "\tfor line in sys.stdin:"
- , "\t\tinp += line"
+ , " inp = ''"
+ , " for line in sys.stdin:"
+ , " inp += line"
, ""
, "def onError(e):"
- , " print(e)"
- , " print('Parse failed')"
- , " quit(1)"
+ , " print(e)"
+ , " print('Parse failed')"
+ , " quit(1)"
, ""
, "# Creates the Lark parser with the given grammar. By default to the first"
, "# entrypoint. Other entrypoints exist in ParsingDefs.py."
@@ -118,17 +118,17 @@ pyTest pkgName cf = unlines
, ""
, "# By default the first entrypoint is used. See ParsingDefs.py for alternatives."
, "ast = parser.parse(inp, on_error=onError)"
- , "if ast: # and not lexer.syntaxError:"
- , "\tprint('Parse Successful!\\n')"
- , "\tprint('[Abstract Syntax]')"
- , "\tprint(printAST(ast))"
- , "\tprint('\\n[Linearized Tree]')"
- , "\tlinTree = lin(ast)"
- , "\tprint(renderC(linTree))"
- , "\tprint()"
+ , "if ast:"
+ , " print('Parse Successful!\\n')"
+ , " print('[Abstract Syntax]')"
+ , " print(printAST(ast))"
+ , " print('\\n[Linearized Tree]')"
+ , " linTree = lin(ast)"
+ , " print(renderC(linTree))"
+ , " print()"
, "else:"
- , "\tprint('Parse failed')"
- , "\tquit(1)"
+ , " print('Parse failed')"
+ , " quit(1)"
]
where
defaultEntrypoint = map toLower
diff --git a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
index 3daefee6..dd346984 100644
--- a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
+++ b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
@@ -13,7 +13,7 @@ import BNFC.Backend.Python.RegToFlex (printRegFlex, escapeChar)
import BNFC.Backend.Common.NamedVariables
import Text.PrettyPrint (Doc, render)
import Data.Either (lefts)
-import Data.Char (toLower, toUpper)
+import Data.Char (toLower, toUpper, isLower)
import qualified Data.List.NonEmpty as List1
@@ -29,6 +29,7 @@ cf2PyAbs pkgName cf = ( unlines
, ""
, createGrammar cf
, createTransformer cf
+ , createDefineFunctions cf
, ""
]
, unlines
@@ -59,8 +60,8 @@ cf2PyAbs pkgName cf = ( unlines
-- Note: Custom tokens are set to inherit "str".
valueCatNames = nub $
- (map (show . normCat . valCat) rulesNoListConstructors) ++
- (map (++"(str)") (tokenNames cf)) ++
+ (map (unkw . show . normCat . valCat) rulesNoListConstructors) ++
+ (map ((++ "(str)") . unkw) (tokenNames cf)) ++
[ "String(str)"
, "Char(str)"
, "Ident(str)"
@@ -90,7 +91,7 @@ createGrammar cf = unlines
enumeratedRules :: [(Int, Rul RFun)]
enumeratedRules = enumerateAllDefinedRules rs 1 []
- orClauses = map (createOrClause cf enumeratedRules) aCats
+ orClauses = map (createOrClause enumeratedRules) aCats
(multiMatchers, singleMatchers) = comments cf
singleComments = map createLineCommentMatcher singleMatchers
@@ -111,8 +112,8 @@ enumerateAllDefinedRules (r:rs) n irs
-- Creates an or clause with all rules for a given category.
-createOrClause :: CF -> [(Int, Rul RFun)] -> Cat -> String
-createOrClause cf irs c = unlines
+createOrClause :: [(Int, Rul RFun)] -> Cat -> String
+createOrClause irs c = unlines
[ " ?" ++ map toLower (translateToList (show c)) ++ ": " ++
intercalate "\n | "
(map createProdAndNameForRule catsIrs)
@@ -132,23 +133,25 @@ createProdAndNameForRule (n, r) = prodToDocStr (rhsRule r) ++
| isOneFun r = "one" ++ (identCat . valCat) r
| isConsFun r = "cons" ++ (identCat . valCat) r
| isDefinedRule r = "d" ++ show n ++ "_r_" ++ funName r
- | otherwise = "r_" ++ funName r
+ | otherwise = "r_" ++ map toLower (funName r) ++ toOrd (funName r)
--- Creates the literals for a grammar for Lark.
+-- Creates the literals for a grammar for Lark.Priority is set after the
+-- dot, such as "Name.PRIO". For literals with the same priority, it appears
+-- that Lark (with basic mode) prioritizes the longest regular
+-- expression, not the longest matched literal.
larkLiterals :: CF -> String
larkLiterals cf = unlines $ concat
[
- ifC catString [createLiteral "String.2" "\"(\\\\.|[^\"])*\""]
- , ifC catChar [createLiteral "Char.2" "\\'(\\\\x[0-9a-f][0-9a-f]|\\\\?[\\S\\s])\\'"]
- , ifC catDouble [createLiteral "Double.2" "\\d+\\.\\d+(e-?\\d+)?"]
- , ifC catInteger [createLiteral "Integer.2" "\\d+"]
+ ifC catString [createLiteral "String.1" "\"(\\\\.|[^\"])*\""]
+ , ifC catChar [createLiteral "Char.1" "\\'(\\\\x[0-9a-f][0-9a-f]|\\\\?[\\S\\s])\\'"]
+ , ifC catDouble [createLiteral "Double.1" "\\d+\\.\\d+(e-?\\d+)?"]
+ , ifC catInteger [createLiteral "Integer.1" "\\d+"]
-- Prolog requires user defined tokens to have priority over Ident; C
-- requires Double to have priority over user defined tokens, as C has
- -- "CDouble" matching "3." in 3.14. The lexer definitions rely on the order
- -- for priority, not the length.
+ -- "CDouble" matching "3." in 3.14.
, userDefTokens
- , ifC catIdent [createLiteral "Ident" "[A-Za-z]\\w*"]
+ , ifC catIdent [createLiteral "Ident" "[A-Za-z_]\\w*"]
]
where
ifC :: TokenCat -> [String] -> [String]
@@ -156,7 +159,7 @@ larkLiterals cf = unlines $ concat
userDefTokens :: [String]
userDefTokens = [
- createLiteral (name) (printRegFlex exp) | (name, exp) <- tokenPragmas cf
+ createLiteral name (printRegFlex exp) | (name, exp) <- tokenPragmas cf
]
createLiteral :: String -> String -> String
@@ -171,7 +174,7 @@ createTransformer cf = unlines
[ "#transformer"
, "class TreeTransformer(Transformer):"
, unlines (map createRuleTransform rs)
- , unlines (map (makeDefineTransform cf) enumeratedRDs)
+ , unlines (map makeDefineTransform enumeratedRDs)
, unlines (map createListTransform listRules)
, createTokenTransformers cf
]
@@ -196,14 +199,16 @@ createTransformer cf = unlines
createRuleTransform :: Rul RFun -> String
createRuleTransform r = unlines
[ " @v_args(inline=True)"
- , " def r_" ++ map toLower (funName r) ++ "(self" ++
+ , " def r_" ++ nameWithUnicode ++ "(self" ++
concat (map (", " ++) enumeratedVars) ++ "):"
- , " return " ++ funName r ++ "(" ++ intercalate ", " enumeratedVars ++ ")"
+ , " return " ++ className ++ "(" ++ intercalate ", " enumeratedVars ++ ")"
]
where
+ nameWithUnicode = map toLower (funName r) ++ toOrd (funName r)
+ className = unkw (funName r)
sentForm = rhsRule r
nvCats = numVars sentForm :: [Either (Cat, Doc) String]
- enumeratedVars = [render d | (c, d) <- lefts nvCats]
+ enumeratedVars = [render d | (_, d) <- lefts nvCats]
-- Creates a transform for a list rule.
@@ -223,7 +228,7 @@ createListTransform r = unlines
sentForm = rhsRule r
nvCats = numVars sentForm :: [Either (Cat, Doc) String]
- enumeratedVars = [render d | (c, d) <- lefts nvCats]
+ enumeratedVars = [render d | (_, d) <- lefts nvCats]
args :: String
| isNilFun r = "[]"
@@ -241,10 +246,6 @@ createTokenTransformers cf = unlines $ concat
, ifC catChar [createTokenTransform "Char"]
, ifC catDouble [createTokenTransform "Double"]
, ifC catInteger [createTokenTransform "Integer"]
- -- Prolog requires user defined tokens to have priority over Ident; C
- -- requires Double to have priority over user defined tokens, as C has
- -- "CDouble" matching "3." in 3.14. The lexer definitions rely on the order
- -- for priority, not the length.
, userDefTokens
, ifC catIdent [createTokenTransform "Ident"]
]
@@ -254,7 +255,7 @@ createTokenTransformers cf = unlines $ concat
userDefTokens :: [String]
userDefTokens = [
- createTokenTransform name | (name, exp) <- tokenPragmas cf
+ createTokenTransform name | (name, _) <- tokenPragmas cf
]
@@ -263,7 +264,7 @@ createTokenTransform :: String -> String
createTokenTransform name = unlines
[ " @v_args(inline=True)"
, " def " ++ map toUpper name ++ "(self, token):"
- , " return " ++ name ++ "(token.value)"
+ , " return " ++ unkw name ++ "(token.value)"
]
@@ -316,40 +317,38 @@ nameCorresponds _ _ = error "Names can't be empty"
-- Creates a transformer for a rule with its corresponding define.
-makeDefineTransform ::
- CF -> (Int, Rul RFun, Define) -> String
-makeDefineTransform cf (n, defRule, defi) = unlines
+makeDefineTransform :: (Int, Rul RFun, Define) -> String
+makeDefineTransform (n, defRule, defi) = unlines
[ " @v_args(inline=True)"
, " def d" ++ show n ++ "_r_" ++ map toLower name ++ "(self" ++
concat (map (", " ++) enumeratedVars) ++ "):"
- , " return " ++ expToDef env2 (defBody defi)
+ , " return d_" ++ name ++ "(" ++ intercalate ", " enumeratedVars ++ ")"
, ""
]
where
name = (wpThing . defName) defi
sentForm = rhsRule defRule
- args = map fst (defArgs defi)
nvCats = numVars sentForm :: [Either (Cat, Doc) String]
- enumeratedVars = [render d | (c, d) <- lefts nvCats]
- env2 = zip args enumeratedVars
+ enumeratedVars = [render d | (_, d) <- lefts nvCats]
-- | Converts the production of a define, called an expression, to a
-- production for the parsing definition.
-expToDef :: [(String, String)] -> Exp -> String
-expToDef env (App "(:)" _ (e:[App "[]" _ _])) = expToDef env e ++ "]"
-expToDef env (App "(:)" _ (e:[recList])) = "[" ++ expToDef env e ++ ", " ++
- expToDef env recList
+expToDef :: CF -> Exp -> String
+expToDef cf (App "(:)" _ (e:[App "[]" _ _])) = expToDef cf e ++ "]"
+expToDef cf (App "(:)" _ (e:[recList])) = "[" ++ expToDef cf e ++ ", " ++
+ expToDef cf recList
expToDef _ (App "[]" _ _) = "[]"
-expToDef env (App fName _ exps) =
- fName ++ "(" ++ addCommas (map (expToDef env) exps) ++ ")"
-expToDef env (Var s) = case lookup s env of
- Just p -> p
- Nothing -> error "Missing variable in define enviroment"
+expToDef cf (App fName _ exps)
+ | isLower (head fName) =
+ "d_" ++ fName ++ "(" ++ addCommas (map (expToDef cf) exps) ++ ")"
+ | otherwise =
+ unkw fName ++ "(" ++ addCommas (map (expToDef cf) exps) ++ ")"
+expToDef _ (Var s) = unkw s
expToDef _ (LitInt i) = "Integer(" ++ show i ++ ")"
expToDef _ (LitDouble d) = "Double(" ++ show d ++ ")"
expToDef _ (LitChar s) = "Char(\"" ++ show s ++ "\")"
-expToDef _ (LitString s) = "String('" ++ show s ++ "')"
+expToDef _ (LitString s) = "String('" ++ s ++ "')"
-- A placeholder variable to store additional information, for say type
@@ -380,29 +379,6 @@ placeholderVariableClass = unlines
]
--- | Creates a parsing definition that points to all entrypoints.
-createCommonEntrypointDef :: CF -> String
-createCommonEntrypointDef cf = unlines
- [ "def p__Start(p):"
- , " '''"
- , " _Start : " ++ (translateToList . show . head) cats ++
- concat (map createCase (tail cats))
- , " '''"
- , " p[0] = p[1]"
- , ""
- , ""
- , "# Comment the below line to enable the '_Start' entrypoint (may yield"
- ++ " conflict warnings)."
- , "del p__Start"
- , ""
- ]
- where
- cats = (List1.toList . allEntryPoints) cf
-
- createCase :: Cat -> String
- createCase c = "\n | " ++ translateToList (show c)
-
-
-- | The value categories become abstract classes, for type hinting.
createValueCatClass :: String -> String
createValueCatClass s = "class " ++ s ++ ":\n\tpass\n"
@@ -412,10 +388,10 @@ createValueCatClass s = "class " ++ s ++ ":\n\tpass\n"
makePythonClass :: Rul RFun -> String
makePythonClass rule =
"@dataclass\n" ++
- "class " ++ name ++ ":\n" ++
+ "class " ++ className ++ ":\n" ++
if length cats == 0 then "\tpass\n" else classBody
where
- name = funName rule
+ className = unkw (funName rule)
sentForm = rhsRule rule
cats = lefts sentForm
nvCats = numVars sentForm :: [Either (Cat, Doc) String]
@@ -429,7 +405,22 @@ makePythonClass rule =
-- | Creates the corresponding type hinting for some member variable.
strCatToPyTyping :: String -> String
-strCatToPyTyping s =
- if strIsList s then "_List['" ++ (tail . init) s ++ "']" else s
+strCatToPyTyping s = if strIsList s
+ then "_List['" ++ (unkw . tail . init) s ++ "']"
+ else unkw s
+
+
+-- | Creates functions for the defines.
+createDefineFunctions :: CF -> String
+createDefineFunctions cf = unlines
+ (map (createDefineFunction cf) (definitions cf))
+createDefineFunction :: CF -> Define -> String
+createDefineFunction cf d = unlines
+ [ "def d_" ++ (wpThing . defName) d ++ "(" ++ addCommas args ++ "):"
+ , " return " ++ expToDef cf (defBody d)
+ ]
+ where
+ args = map (unkw . fst) (defArgs d)
+
diff --git a/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs b/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs
index 20255c19..352f12c1 100644
--- a/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs
+++ b/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs
@@ -67,8 +67,8 @@ makePrintAST cf = concat
]
, if length (tokenNames cf) > 0
then unlines
- [ " case (" ++ intercalate " | " (map (++"()") (tokenNames cf))
- ++ "):"
+ [ " case (" ++ intercalate " | "
+ (map ((++ "()") . unkw) (tokenNames cf)) ++ "):"
, " return '\"' + str(ast) + '\"'"
]
else ""
@@ -76,7 +76,8 @@ makePrintAST cf = concat
, " return '[' + ', '.join([printAST(a) for a in ast]) + ']'\n"
, "\n"
, " if len(vars(ast)) > 0:\n"
- , " return '(' + ast.__class__.__name__ + ' ' + ' '.join([printAST(vars(ast)[k]) for k in vars(ast) if k != '_ann_type']) + ')'\n"
+ , " return '(' + ast.__class__.__name__ + ' ' + ' '.join(" ++
+ "[printAST(vars(ast)[k]) for k in vars(ast) if k != '_ann_type']) + ')'\n"
, " else:\n"
, " return ast.__class__.__name__\n"
]
@@ -107,7 +108,7 @@ makeListDecon cf c = concat
]
where
name = show $ catOfList c
- listRulesForCat = rulesForCat cf c
+ listRulesForCat = [ r | r <- cfgRules cf, isParsable r, valCat r == c]
nilRule = case [r | r <- listRulesForCat, isNilFun r] of
[] -> Nothing
@@ -122,14 +123,14 @@ makeListDecon cf c = concat
-- List rules are of the form:
-- [C] ::= symbols.. C symbols.. [C]
-- The production, in Python, is concatenated recursively:
- -- symbols.. + lin(xs[0]) + symbols.. + listCDecon(xs[1:]) + symbols..
+ -- symbols.. + c(xs[0], 'C1') + symbols.. + listCDecon(xs[1:]) + symbols..
sentFormToArgs :: Int -> [Either Cat String] -> String
sentFormToArgs _ [] = "[]"
sentFormToArgs v (Right strOp:ecss) =
"['" ++ escapeChars strOp ++ "'] + " ++
sentFormToArgs v ecss
sentFormToArgs v (Left _:ecss)
- | v == 0 = "lin(xs[0]) + " ++ sentFormToArgs (v+1) ecss
+ | v == 0 = "c(xs[0], '" ++ name ++ "') + " ++ sentFormToArgs (v+1) ecss
| v == 1 = "list" ++ name ++ "Decon(xs[1:]) + " ++
sentFormToArgs (v+1) ecss
| otherwise = error "A list production can max have C and [C]."
@@ -165,14 +166,6 @@ makeRenderC = unlines
, " def ident(i):"
, " return ' ' * iLevel"
, ""
- , " def removeTrailingWhitespace(tot):"
- , " i = len(tot)"
- , " while i > 0:"
- , " if tot[i] == ' ':"
- , " i -= 1"
- , " else:"
- , " break"
- , ""
, " return tot[:i]"
, ""
, " def oneEmptyLine(tot):"
@@ -211,7 +204,11 @@ makeRenderC = unlines
, " case ' ':"
, " tot += s"
, " case _:"
- , " tot += s + ' '"
+ , " if s[-1] == ' ':" -- To not extend separators of spaces.
+ , " tot = tot.rstrip()"
+ , " tot += s"
+ , " else:"
+ , " tot += s + ' '"
, ""
, " return tot"
]
@@ -221,7 +218,7 @@ makeRenderC = unlines
makeCoercCompare :: CF -> String
makeCoercCompare cf = concat
[ "cdict = {\n"
- , unlines (map (\(fs, cs) -> " " ++ fs ++ " : '" ++ cs ++ "',") scs)
+ , unlines (map (\(fs, cs) -> " " ++ unkw fs ++ " : '" ++ cs ++ "',") scs)
, "}"
]
where
@@ -278,7 +275,7 @@ makeLinFunc cf = unlines
]
, ifUsedThen catString
[ " case String():"
- , " return [ast, ' ']"
+ , " return [ast]"
]
, ifUsedThen catIdent
[ " case Ident():"
@@ -350,8 +347,8 @@ makeListEntrypointCase cf c = concat
]
where
constructors = if isTokenCat c
- then [show c ++ "()"]
- else map ((++ "()") . funName)
+ then [unkw (show c) ++ "()"]
+ else map ((++ "()") . unkw . funName)
[
r | r <- rulesForNormalizedCat cf (normCat c),
not (isCoercion r),
@@ -362,7 +359,7 @@ makeListEntrypointCase cf c = concat
-- Creates a case for a user defined literal, which inherits str.
makeSkeleTokenCase :: String -> String
makeSkeleTokenCase tokenName = concat
- [ " case " ++ tokenName ++ "():\n"
+ [ " case " ++ unkw tokenName ++ "():\n"
, " return [ast]"
]
@@ -371,7 +368,7 @@ makeSkeleTokenCase tokenName = concat
-- separator- and terminator-delimiters there are.
makeSkeleRuleCase :: Rul RFun -> String
makeSkeleRuleCase rule = concat
- [ " case " ++ fName ++ "(" ++ varNamesCommad ++ "):\n"
+ [ " case " ++ unkw fName ++ "(" ++ varNamesCommad ++ "):\n"
, " # " ++ (showEcss sentForm) ++ "\n"
, " return " ++ if (length args > 0)
then (intercalate " + " args)
@@ -382,8 +379,7 @@ makeSkeleRuleCase rule = concat
sentForm = rhsRule rule
nvCats = numVars sentForm :: [Either (Cat, Doc) String]
-
- enumeratedVarNames = [render d | (c, d) <- lefts nvCats]
+ enumeratedVarNames = [render d | (_, d) <- lefts nvCats]
varNamesCommad = if length enumeratedVarNames > 0
then addCommas (enumeratedVarNames ++ ["_ann_type"])
@@ -406,4 +402,5 @@ ecssAndVarsToList (Left c:ecss) (s:ss)
name = show $ catOfList c
ecssAndVarsToList (Right strOp:ecss) ss =
["['" ++ escapeChars strOp ++ "']"] ++ ecssAndVarsToList ecss ss
+ecssAndVarsToList ((Left _):_) [] = error "Missing variable name"
diff --git a/source/src/BNFC/Backend/Python/CFtoPySkele.hs b/source/src/BNFC/Backend/Python/CFtoPySkele.hs
index 5297fa02..26904764 100644
--- a/source/src/BNFC/Backend/Python/CFtoPySkele.hs
+++ b/source/src/BNFC/Backend/Python/CFtoPySkele.hs
@@ -11,16 +11,13 @@ import Data.Char (toLower)
import BNFC.Backend.Common.NamedVariables
import Text.PrettyPrint (Doc, render)
import Data.Either (lefts)
+import Data.List (intercalate)
-- | Entrypoint.
cf2PySkele :: String -> CF -> String
cf2PySkele pkgName cf = unlines
- [ "from ply.lex import lex"
- , "from ply.yacc import yacc"
- , "import sys"
- , "from " ++ pkgName ++ ".LexTokens import *"
- , "from " ++ pkgName ++ ".ParsingDefs import *"
- , "from " ++ pkgName ++ ".PrettyPrinter import *"
+ ["from " ++ pkgName ++ ".Absyn import *"
+ , ""
, ""
, makeSkele cf
]
@@ -32,15 +29,16 @@ makeSkele :: CF -> String
makeSkele cf = unlines
[ "# Categories combined into one matcher"
, "def skeleMatcher(ast: object):"
- , "\tmatch ast:"
- , unlines skeleLiteralCases
- , unlines skeleTokenCases
- , unlines skeleRuleCases
- , "\t\tcase _:"
- , "\t\t\traise Exception(str(ast.__class__) + ' unmatched')"
+ , ind 1 "match ast:"
+ , intercalate "\n" skeleLiteralCases
+ , intercalate "\n" skeleTokenCases
+ , intercalate "\n" skeleRuleCases
+ , ind 2 "case _:"
+ , ind 3 "raise Exception(str(ast.__class__) + ' unmatched')"
+ , ""
, ""
, "# Categories split into their own matchers"
- , unlines matchersOnCats
+ , unlines matchersOnCats
]
where
rules =
@@ -72,12 +70,13 @@ makeSkele cf = unlines
-- Creates a matcher for some value category.
makeMatcherOnCat :: (Cat, [Rul RFun]) -> String
-makeMatcherOnCat (c, rules) = unlines
+makeMatcherOnCat (c, rules) = unlines
[ "def matcher" ++ show c ++ "(" ++ varName ++ ": " ++ show c ++ "):"
- , "\tmatch " ++ varName ++ ":"
- , unlines cases
- ,"\t\tcase _:"
- ,"\t\t\traise Exception(str(" ++ varName ++ ".__class__) + ' unmatched')"
+ , ind 1 "match " ++ varName ++ ":"
+ , intercalate "\n" cases
+ , ind 2 "case _:"
+ , ind 3 "raise Exception(str(" ++ varName ++ ".__class__) + ' unmatched')"
+ , ""
]
where
varName = map toLower (show c) ++ "_"
@@ -88,27 +87,23 @@ makeMatcherOnCat (c, rules) = unlines
-- | Creates a case for some rule.
makeSkeleRuleCase :: Rul RFun -> String
-makeSkeleRuleCase rule = concat
- [ "\t\tcase " ++ fName ++ "(" ++ varNamesCommad ++ "):\n"
- , "\t\t\t# " ++ (showEcss sentForm) ++ "\n"
- , "\t\t\traise Exception('" ++ fName ++ " not implemented')"
+makeSkeleRuleCase rule = intercalate "\n"
+ [ ind 2 "case " ++ name ++ "(" ++ varNamesCommad ++ "):"
+ , ind 3 "# " ++ (showEcss sentForm)
+ , ind 3 "raise Exception('" ++ name ++ " not implemented')"
]
where
- funcRStr = funRule rule :: RString
- fName = wpThing funcRStr :: String
+ name = unkw (funName rule)
sentForm = rhsRule rule
-
nvCats = numVars sentForm :: [Either (Cat, Doc) String]
-
enumeratedVarNames = [render d | (_, d) <- lefts nvCats]
-
varNamesCommad = addCommas (enumeratedVarNames ++ ["_ann_type"])
-- | Creates a case for a user-defined token.
makeSkeleTokenCase :: String -> String
-makeSkeleTokenCase tokenName = concat
- [ "\t\tcase " ++ tokenName ++ "():\n"
- , "\t\t\traise Exception('not implemented')"
+makeSkeleTokenCase tokenName = intercalate "\n"
+ [ ind 2 "case " ++ unkw tokenName ++ "():"
+ , ind 3 "raise Exception('" ++ unkw tokenName ++ " not implemented')"
]
diff --git a/source/src/BNFC/Backend/Python/PyHelpers.hs b/source/src/BNFC/Backend/Python/PyHelpers.hs
index 300c1d9f..f68abe13 100644
--- a/source/src/BNFC/Backend/Python/PyHelpers.hs
+++ b/source/src/BNFC/Backend/Python/PyHelpers.hs
@@ -10,6 +10,12 @@ import Data.Char
import BNFC.CF
+-- Indents by four spaces
+ind :: Int -> String -> String
+ind 0 s = s
+ind n s = ind (n-1) (" " ++ s)
+
+
addCommas :: [String] -> String
addCommas ss = intercalate ", " ss
@@ -78,3 +84,53 @@ showEcss [] = ""
showEcss (Left c:ecss) = show c ++ " " ++ (showEcss ecss)
showEcss (Right strOp:ecss) = "\"" ++ strOp ++ "\" " ++ (showEcss ecss)
+
+-- | Adds an underscore if the string overlaps with a keyword.
+unkw :: String -> String
+unkw s = if s `elem` kwListWithSoftKeywords then s ++ "_" else s
+
+
+-- To add an extra underscore if something overlaps with a keyword.
+kwListWithSoftKeywords :: [String]
+kwListWithSoftKeywords =
+ [ "False"
+ , "None"
+ , "True"
+ , "and"
+ , "as"
+ , "assert"
+ , "async"
+ , "await"
+ , "break"
+ , "class"
+ , "continue"
+ , "def"
+ , "del"
+ , "elif"
+ , "else"
+ , "except"
+ , "finally"
+ , "for"
+ , "from"
+ , "global"
+ , "if"
+ , "import"
+ , "in"
+ , "is"
+ , "lambda"
+ , "nonlocal"
+ , "not"
+ , "or"
+ , "pass"
+ , "raise"
+ , "return"
+ , "try"
+ , "while"
+ , "with"
+ , "yield"
+ , "_"
+ , "case"
+ , "match"
+ , "type"
+ ]
+
From 6eafcd6bfa31ad64beab7999d8295e19274dd6b9 Mon Sep 17 00:00:00 2001
From: AiStudent
Date: Mon, 6 Jan 2025 19:21:49 +0100
Subject: [PATCH 8/8] Retargeting to ANTLRv4
---
docs/user_guide.rst | 54 ++-
document/BNF_Converter_Python_Mode.html | 76 ++--
source/BNFC.cabal | 36 +-
source/src/BNFC/Backend/Python.hs | 81 +++--
source/src/BNFC/Backend/Python/Antlr4Utils.hs | 46 +++
.../BNFC/Backend/Python/CFtoAntlr4Lexer.hs | 187 ++++++++++
.../BNFC/Backend/Python/CFtoAntlr4Parser.hs | 342 ++++++++++++++++++
source/src/BNFC/Backend/Python/CFtoPyAbs.hs | 295 +--------------
.../Backend/Python/CFtoPyPrettyPrinter.hs | 82 +++--
source/src/BNFC/Backend/Python/PyHelpers.hs | 23 +-
testing/src/ParameterizedTests.hs | 2 +-
11 files changed, 810 insertions(+), 414 deletions(-)
create mode 100644 source/src/BNFC/Backend/Python/Antlr4Utils.hs
create mode 100644 source/src/BNFC/Backend/Python/CFtoAntlr4Lexer.hs
create mode 100644 source/src/BNFC/Backend/Python/CFtoAntlr4Parser.hs
diff --git a/docs/user_guide.rst b/docs/user_guide.rst
index c4e1e287..d68784c9 100644
--- a/docs/user_guide.rst
+++ b/docs/user_guide.rst
@@ -289,13 +289,13 @@ Python Backend
===============
The BNF Converter's Python Backend generates a Python frontend, that uses
-Lark, to parse input into an AST (abstract syntax tree).
+Antlr4, to parse input into an AST (abstract syntax tree).
-Lark and Python 3.10 or higher is needed.
+The python package Antlr4, the jar for Antlr4 and Python 3.10 or higher is needed.
Example usage: ::
- bnfc --python Calc.cf
+ bnfc --python -m Calc.cf
.. list-table:: The result is a set of files:
@@ -304,19 +304,29 @@ Example usage: ::
* - Filename
- Description
+ * - bnfcPyGenCalc/CalcLexer.g4
+ - Provides the grammar for the lexer.
+ * - bnfcPyGenCalc/CalcParser.g4
+ - Provides the grammar for the parser.
* - bnfcPyGenCalc/Absyn.py
- Provides the classes for the abstract syntax.
- * - bnfcPyGenCalc/ParserDefs.py
- - Provides Lark with the information needed to build the lexer and parser.
* - bnfcPyGenCalc/PrettyPrinter.py
- Provides printing for both the AST and the linearized tree.
* - genTest.py
- A ready test-file, that uses the generated frontend to convert input into an AST.
* - skele.py
- Provides skeleton code to deconstruct an AST, using structural pattern matching.
+ * - Makefile
+ - The makefile, which uses an Antlr jar file to produce the lexer and parser for Python.
-Optionally one may with ``-m``` also create a makefile that contains the target
-"distclean" to remove the generated files.
+Make sure the jar for Antlr is accessible from the generated makefile and
+run the makefile. For example, on linux, one can export the following
+variable from ``.profile``:
+
+``export ANTLR="$HOME/Downloads/antlr/antlr-4.13.2-complete.jar"``
+
+Subsequently run ``make``. The generated lexer and parser is placed inside the
+folder used above.
Testing the frontend
....................
@@ -337,16 +347,15 @@ and it's possible to just use an argument::
Caveats
.......
-Several entrypoints:
- The testfile genTest.py only uses the first entrypoint used by default. To
- use all entrypoints, set the start parameter to "start_". If the
- entrypoints cause reduce/reduce conflicts, a lark GrammarError will be
- produced.
+Maximum elements for hand-made lists:
+ If one defines custom rules for lists, such as::
+
+ (:) [C] ::= 'a' C 'b' [C] 'c'
-Results from the parameterized tests:
- While the Python backend generates working frontends for the example
- grammars, five "failures" and six "errors" among the regression
- tests are reported.
+ the Python backend can not simplify the rule for an iterative approach
+ for the parser, meaning at most 1000 elements can be parsed - or a maximum
+ recursion depth will be thrown. Using the terminal or separator pragmas
+ should work fine.
Skeleton code for using lists as entrypoints:
Matchers for using lists, such as [Exp], are not generated in the
@@ -355,10 +364,21 @@ Skeleton code for using lists as entrypoints:
checking what type the elements have. Users are instead encouraged to use
non-list entrypoints.
-Using multiple separators
+Several entrypoints:
+ The testfile genTest.py only uses the first entrypoint used by default.
+
+Using multiple separators:
Using multiple separators for the same category, such as below, generates
Python functions with overlapping names, causing runtime errors.::
separator Exp1 "," ;
separator Exp1 ";" ;
+Results from the parameterized tests:
+ One error among the regression tests are reported: the Java BNFC example
+ grammar contains mutually left recursive rules.
+
+Escaped characters in haskell-hcr:
+ Attempting to parse ParCore.hcr from the BNFC example grammar
+ haskell-hcr yield errors for escaped characters.
+
diff --git a/document/BNF_Converter_Python_Mode.html b/document/BNF_Converter_Python_Mode.html
index 4ffb46f6..ce939979 100644
--- a/document/BNF_Converter_Python_Mode.html
+++ b/document/BNF_Converter_Python_Mode.html
@@ -27,38 +27,42 @@ By Björn Werner
2024
The BNF Converter's Python Backend generates a Python frontend, that uses
- Lark, to parse input into an AST (abstract syntax tree).
+ Antlr4, to parse input into an AST (abstract syntax tree).
BNFC on Github:
https://github.com/BNFC/bnfc
- Lark github:
- https://github.com/lark-parser/lark
+ Antlr on Github:
+ https://github.com/antlr/antlr4
- Python 3.10 or higher is needed.
+ Requirements are: the jar file for ANTLRv4, the Python package antlr4, and
+ Python 3.10 or higher.
Usage
- bnfc --python NAME.cf
+ bnfc --python -m NAME.cf
-The result is a set of files:
+There should now exist the following files:
Filename: | Description: |
- bnfcGenNAME/Absyn.py | Provides the classes for the abstract syntax. |
+ bnfcPyGenNAME/NAMELexer.g4 | Provides the grammar for the lexer. |
- bnfcGenNAME/ParserDefs.py | Provides Lark with the information needed to build the lexer and parser. |
+ bnfcPyGenNAME/NAMEParser.g4 | Provides the grammar for the parser. |
- bnfcGenNAME/PrettyPrinter.py | Provides printing for both the AST and the linearized tree. |
+ bnfcPyGenNAME/Absyn.py | Provides the classes for the abstract syntax. |
+
+
+ bnfcPyGenNAME/PrettyPrinter.py | Provides printing for both the AST and the linearized tree. |
genTest.py | A ready test-file, that uses the generated frontend to convert input into an AST. |
@@ -67,7 +71,21 @@ Usage
skele.py | Provides skeleton code to deconstruct an AST, using structural pattern matching. |
-
+
+Make sure the jar for Antlr is accessible from the generated makefile and run the makefile. The generated lexer and parser is placed inside the folder used above.
+
+
+ For example, on linux, export the following variable from .profile:
+
+
+export ANTLR="$HOME/Downloads/antlr/antlr-4.13.2-complete.jar"
+
+
+ After that it should be possible to run the makefile:
+
+
+ make
+
Testing the frontend
The following example uses a frontend that is generated from a C-like grammar.
@@ -143,13 +161,27 @@
Using the skeleton file
Known issues
+
+ Maximum elements for hand-made list rules:
+
+
+ If one defines custom rules for lists, such as:
+
+
+ (:) [C] ::= 'a' C 'b' [C] 'c'
+
+
+ the Python backend can not simplify the rule for an iterative approach
+ for the parser, meaning at most 1000 elements can be parsed - or a maximum
+ recursion depth will be thrown. Using the terminal or separator pragmas should work fine.
+
Skeleton code for using lists as entrypoints:
Matchers for using lists, such as [Exp], are not generated in the
skeleton code as it may confuse users if the grammar uses several different
- list categories, as a user may then try to pattern match lists without
+ list categories - as a user may then try to pattern match lists without
checking what type the elements have. Users are instead encouraged to use
non-list entrypoints.
@@ -169,8 +201,12 @@
for exp in listexp_:
...
+Several entrypoints:
+
+ The testfile genTest.py only uses the first entrypoint by default.
+
- Using multiple separators
+ Using multiple separators:
Using multiple separators for the same category, such as below, generates
@@ -180,19 +216,15 @@
separator Exp1 "," ;
separator Exp1 ";" ;
-Several entrypoints:
+
+Results from the parameterized tests:
+
- The testfile genTest.py only uses the first entrypoint used by default. To
- use all entrypoints, set the start parameter to "start_". If the
- entrypoints cause reduce/reduce conflicts, a lark GrammarError will be
- produced.
+ One error among the regression tests are reported: the Java BNFC example grammar contains mutually left recursive rules.
-Results from the parameterized tests:
+ Example for grammar haskell-hcr:
- While the Python backend generates working frontends for the example
- grammars, five "failures" and six "errors" among the regression
- tests are reported.
+ Attempting to parse ParCore.hcr from the haskell-hcr example BNFC grammar yields an error for escaped characters.
-
diff --git a/source/BNFC.cabal b/source/BNFC.cabal
index 9280a8f3..872f28fd 100644
--- a/source/BNFC.cabal
+++ b/source/BNFC.cabal
@@ -32,9 +32,8 @@ Description:
-- Support range when build with cabal
tested-with:
- GHC == 9.10.1
- GHC == 9.8.2
- GHC == 9.6.5
+ GHC == 9.8.1
+ GHC == 9.6.3
GHC == 9.4.8
GHC == 9.2.8
GHC == 9.0.2
@@ -44,6 +43,7 @@ tested-with:
GHC == 8.4.4
GHC == 8.2.2
GHC == 8.0.2
+ GHC == 7.10.3
extra-doc-files:
README.md
@@ -81,9 +81,6 @@ executable bnfc
other-modules:
-- Generated by cabal
Paths_BNFC
- autogen-modules:
- -- Generated by cabal
- Paths_BNFC
default-extensions:
-- Keep in alphabetical order.
LambdaCase
@@ -157,14 +154,6 @@ library
-- BNFC.Lex
-- -- Generated by happy
-- BNFC.Par
- -- 2023-11-03 We cannot add BNFC.{Lex,Par} as then the Lex.x and Par.y files
- -- are not bundled by cabal dist.
- -- Just make sure that there is no src/BNFC/{Lex,Par}.hs before running cabal sdist,
- -- otherwise we will end up with both Lex.hs and Lex.x (resp. Par.{hs,y})
- -- which will cause alex/happy to not be run, leading to build failures.
- autogen-modules:
- -- Generated by cabal
- Paths_BNFC
other-modules:
-- Generated by cabal
Paths_BNFC
@@ -266,6 +255,17 @@ library
BNFC.Backend.Java.RegToAntlrLexer
BNFC.Backend.Java.Utils
+ -- Python backend
+ BNFC.Backend.Python
+ BNFC.Backend.Python.CFtoPyAbs
+ BNFC.Backend.Python.CFtoPyPrettyPrinter
+ BNFC.Backend.Python.RegToFlex
+ BNFC.Backend.Python.PyHelpers
+ BNFC.Backend.Python.CFtoPySkele
+ BNFC.Backend.Python.CFtoAntlr4Lexer
+ BNFC.Backend.Python.CFtoAntlr4Parser
+ BNFC.Backend.Python.Antlr4Utils
+
-- XML backend
BNFC.Backend.XML
@@ -280,14 +280,6 @@ library
BNFC.Backend.TreeSitter.CFtoTreeSitter
BNFC.Backend.TreeSitter.RegToJSReg
- -- Python backend
- BNFC.Backend.Python
- BNFC.Backend.Python.CFtoPyAbs
- BNFC.Backend.Python.CFtoPyPrettyPrinter
- BNFC.Backend.Python.RegToFlex
- BNFC.Backend.Python.PyHelpers
- BNFC.Backend.Python.CFtoPySkele
-
----- Testing --------------------------------------------------------------
test-suite unit-tests
diff --git a/source/src/BNFC/Backend/Python.hs b/source/src/BNFC/Backend/Python.hs
index e0dc012d..3e0240dd 100644
--- a/source/src/BNFC/Backend/Python.hs
+++ b/source/src/BNFC/Backend/Python.hs
@@ -1,11 +1,11 @@
-{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE OverloadedStrings #-}
-
{-
BNF Converter: Python main file
Copyright (C) 2004 Author: Bjorn Werner
-}
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
+
module BNFC.Backend.Python (makePython) where
import Prelude hiding ((<>))
@@ -21,45 +21,54 @@ import BNFC.PrettyPrint
import Data.Char (toLower, isLetter)
import qualified BNFC.Backend.Common.Makefile as Makefile
+import BNFC.Backend.Python.CFtoAntlr4Lexer (cf2AntlrLex)
+import BNFC.Backend.Python.CFtoAntlr4Parser (cf2AntlrParse)
+
-- | Entrypoint for BNFC to use the Python backend.
makePython :: SharedOptions -> CF -> MkFiles ()
makePython opts cf = do
- let pkgName = "bnfcPyGen" ++ filter isLetter name
- let (parsingDefs, abstractClasses) = cf2PyAbs pkgName cf
+ let pkgName = "bnfcPyGen" ++ filteredName
+ let abstractClasses = cf2PyAbs cf
let prettyPrinter = cf2PyPretty pkgName cf
let skeletonCode = cf2PySkele pkgName cf
- mkPyFile (pkgName ++ "/ParsingDefs.py") parsingDefs
mkPyFile (pkgName ++ "/Absyn.py") abstractClasses
mkPyFile (pkgName ++ "/PrettyPrinter.py") prettyPrinter
mkPyFile "skele.py" skeletonCode
- mkPyFile "genTest.py" (pyTest pkgName cf)
- Makefile.mkMakefile (optMake opts) $ makefile pkgName (optMake opts)
+ mkPyFile "genTest.py" (pyTest pkgName filteredName cf)
+ Makefile.mkMakefile (optMake opts) $ makefile pkgName filteredName (optMake opts)
+
+ let (d, kwenv) = cf2AntlrLex filteredName cf
+ mkAntlrFile (pkgName ++ "/" ++ filteredName ++ "Lexer.g4") d
+ --cf2AntlrParse :: String -> String -> CF -> KeywordEnv -> String
+ let p = cf2AntlrParse filteredName (pkgName ++ ".Absyn") cf kwenv
+ mkAntlrFile (pkgName ++ "/" ++ filteredName ++ "Parser.g4") p
where
name :: String
name = lang opts
+ filteredName = filter isLetter name
mkPyFile x = mkfile x comment
-
+ mkAntlrFile x = mkfile x ("//" ++) -- "//" for comments
-- | A makefile with distclean and clean specifically for the testsuite. No
-- "all" is needed as bnfc has already generated the necessary Python files.
-makefile :: String -> Maybe String -> String -> Doc
-makefile pkgName optMakefileName basename = vcat
+makefile :: String -> String -> Maybe String -> String -> Doc
+makefile pkgName filteredName optMakefileName basename = vcat
[
Makefile.mkRule "all" []
- [ "@echo \"Doing nothing: No compilation of the parser needed.\"" ]
+ [ "java -jar $(ANTLR) -Dlanguage=Python3 " ++ pkgName ++ "/" ++ filteredName ++ "Lexer.g4"
+ , "java -jar $(ANTLR) -Dlanguage=Python3 " ++ pkgName ++ "/" ++ filteredName ++ "Parser.g4" ]
, Makefile.mkRule "clean" []
[ "rm -f parser.out parsetab.py" ]
, Makefile.mkRule "distclean" [ "vclean" ] []
, Makefile.mkRule "vclean" []
[ "rm -f " ++ unwords
[
- pkgName ++ "/ParsingDefs.py",
pkgName ++ "/Absyn.py",
pkgName ++ "/PrettyPrinter.py",
- pkgName ++ "/ParsingDefs.py.bak",
pkgName ++ "/Absyn.py.bak",
pkgName ++ "/PrettyPrinter.py.bak",
+ pkgName ++ "/" ++ filteredName ++ "*",
"skele.py",
"genTest.py",
"skele.py.bak",
@@ -86,11 +95,14 @@ comment x = "# " ++ x
-- Produces the content for the testing file, genTest.py.
-pyTest :: String -> CF -> String
-pyTest pkgName cf = unlines
+pyTest :: String -> String -> CF -> String
+pyTest pkgName filteredName cf = unlines
[ "import sys"
- , "from " ++ pkgName ++ ".ParsingDefs import *"
, "from " ++ pkgName ++ ".PrettyPrinter import printAST, lin, renderC"
+ , "from antlr4 import *"
+ , "from " ++ pkgName ++ "." ++ lexerName ++ " import " ++ lexerName
+ , "from " ++ pkgName ++ "." ++ parserName ++ " import " ++ parserName
+ , "from antlr4.error.ErrorListener import ErrorListener"
, ""
, "# Suggested input options:"
, "# python3 genTest.py < sourcefile"
@@ -107,18 +119,28 @@ pyTest pkgName cf = unlines
, " for line in sys.stdin:"
, " inp += line"
, ""
- , "def onError(e):"
- , " print(e)"
- , " print('Parse failed')"
- , " quit(1)"
+ , "class ThrowingErrorListener(ErrorListener):"
+ , " def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):"
+ , " raise Exception(f'Syntax error at line {line}, column {column}: {msg}')"
, ""
- , "# Creates the Lark parser with the given grammar. By default to the first"
- , "# entrypoint. Other entrypoints exist in ParsingDefs.py."
- , "parser = Lark(grammar, start='" ++ defaultEntrypoint ++ "', parser='lalr', lexer='basic', transformer=TreeTransformer())"
+ , "try:"
+ , " lexer = " ++ lexerName ++ "(InputStream(inp))"
+ , " lexer.removeErrorListeners()"
+ , " lexer.addErrorListener(ThrowingErrorListener())"
+ , ""
+ , " stream = CommonTokenStream(lexer)"
+ , " parser = " ++ parserName ++ "(stream)"
+ , " parser.removeErrorListeners()"
+ , " parser.addErrorListener(ThrowingErrorListener())"
+ , ""
+ , " tree = parser.start_" ++ defaultEntrypoint ++ "()"
+ , " ast = tree.result"
+ , " error = False"
+ , "except Exception as e:"
+ , " print(e)"
+ , " error = True"
, ""
- , "# By default the first entrypoint is used. See ParsingDefs.py for alternatives."
- , "ast = parser.parse(inp, on_error=onError)"
- , "if ast:"
+ , "if not error and ast:"
, " print('Parse Successful!\\n')"
, " print('[Abstract Syntax]')"
, " print(printAST(ast))"
@@ -131,7 +153,8 @@ pyTest pkgName cf = unlines
, " quit(1)"
]
where
- defaultEntrypoint = map toLower
- ((translateToList . show . firstEntry) cf)
+ lexerName = filteredName ++ "Lexer"
+ parserName = filteredName ++ "Parser"
+ defaultEntrypoint = (translateToList . show . firstEntry) cf
diff --git a/source/src/BNFC/Backend/Python/Antlr4Utils.hs b/source/src/BNFC/Backend/Python/Antlr4Utils.hs
new file mode 100644
index 00000000..33fea7ef
--- /dev/null
+++ b/source/src/BNFC/Backend/Python/Antlr4Utils.hs
@@ -0,0 +1,46 @@
+{-
+ Description : Copied from the Java backend and modified for use with Python.
+ Modified by : Björn Werner
+-}
+
+module BNFC.Backend.Python.Antlr4Utils (getRuleName, getLabelName, startSymbol,
+ comment)
+ where
+
+import BNFC.CF
+import BNFC.Utils (mkName, NameStyle(..))
+import BNFC.Backend.Python.PyHelpers (pythonReserved)
+
+
+-- | Make an Antlr grammar file line comment
+comment :: String -> String
+comment = ("// " ++)
+
+
+-- Python keywords plus Antlr4 reserved keywords
+pythonAntlrReserved :: [String]
+pythonAntlrReserved = pythonReserved ++
+ [ "catch"
+ , "grammar"
+ , "throws"
+ ]
+
+
+-- | Appends an underscore if there is a clash with a Python or ANTLR keyword.
+-- E.g. "Grammar" clashes with ANTLR keyword "grammar" since
+-- we sometimes need the upper and sometimes the lower case version
+-- of "Grammar" in the generated parser.
+getRuleName :: String -> String
+getRuleName z
+ -- | firstLowerCase z `elem` ("grammar" : pythonReserved) = z ++ "_"
+ | z `elem` pythonAntlrReserved = z ++ "_"
+ | otherwise = z
+
+
+getLabelName :: Fun -> String
+getLabelName = mkName ["Rule"] CamelCase
+
+
+-- | Make a new entrypoint NT for an existing NT.
+startSymbol :: String -> String
+startSymbol = ("Start_" ++)
diff --git a/source/src/BNFC/Backend/Python/CFtoAntlr4Lexer.hs b/source/src/BNFC/Backend/Python/CFtoAntlr4Lexer.hs
new file mode 100644
index 00000000..e1d48098
--- /dev/null
+++ b/source/src/BNFC/Backend/Python/CFtoAntlr4Lexer.hs
@@ -0,0 +1,187 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+{-
+ BNF Converter: Python Antlr4 Lexer generator
+ Copyright (C) 2015 Author: Gabriele Paganelli
+
+ Description : This module generates the Antlr4 input file.
+ Based on CFtoJLex15.hs
+
+ Author : Gabriele Paganelli (gapag@distruzione.org)
+ Created : 15 Oct, 2015
+
+ Edited for Python by
+ : Björn Werner
+ Modified : 30 Dec, 2024
+
+-}
+
+module BNFC.Backend.Python.CFtoAntlr4Lexer ( cf2AntlrLex ) where
+
+import Prelude hiding ((<>))
+
+import Text.PrettyPrint
+import BNFC.CF
+import BNFC.Backend.Java.RegToAntlrLexer
+import BNFC.Backend.Common.NamedVariables
+import BNFC.Backend.Python.Antlr4Utils (getRuleName)
+
+
+-- | Creates a lexer grammar.
+-- Since antlr token identifiers must start with an uppercase symbol,
+-- I prepend "Surrogate_id_SYMB_" to the identifier.
+-- This introduces risks of clashes if somebody uses the same identifier for
+-- user defined tokens. This is not handled.
+-- returns the environment because the parser uses it.
+cf2AntlrLex :: String -> CF -> (Doc, KeywordEnv)
+cf2AntlrLex lang cf = (,env) $ vcat
+ [ prelude lang
+ , cMacros
+ -- unnamed symbols (those in quotes, not in token definitions)
+ , lexSymbols env
+ , restOfLexerGrammar cf
+ ]
+ where
+ env = zip (cfgSymbols cf ++ reservedWords cf) $
+ map (("Surrogate_id_SYMB_" ++) . show) [0 :: Int ..]
+
+
+-- | File prelude
+prelude :: String -> Doc
+prelude lang = vcat
+ [ "// Lexer definition for use with Antlr4"
+ , "lexer grammar" <+> text lang <> "Lexer;"
+ ]
+
+
+--For now all categories are included.
+--Optimally only the ones that are used should be generated.
+cMacros :: Doc
+cMacros = vcat
+ [ "// Predefined regular expressions in BNFC"
+ , frg "LETTER : CAPITAL | SMALL"
+ , frg "CAPITAL : [A-Z\\u00C0-\\u00D6\\u00D8-\\u00DE]"
+ , frg "SMALL : [a-z\\u00DF-\\u00F6\\u00F8-\\u00FF]"
+ , frg "DIGIT : [0-9]"
+ ]
+ where frg a = "fragment" <+> a <+> ";"
+
+
+escapeChars :: String -> String
+escapeChars = concatMap escapeCharInSingleQuotes
+
+
+-- |
+-- >>> lexSymbols [("foo","bar")]
+-- bar : 'foo' ;
+-- >>> lexSymbols [("\\","bar")]
+-- bar : '\\' ;
+-- >>> lexSymbols [("/","bar")]
+-- bar : '/' ;
+-- >>> lexSymbols [("~","bar")]
+-- bar : '~' ;
+lexSymbols :: KeywordEnv -> Doc
+lexSymbols ss = vcat $ map transSym ss
+ where
+ transSym (s,r) = text r <> " : '" <> text (escapeChars s) <> "' ;"
+
+
+-- | Writes rules for user defined tokens, and, if used, the predefined
+-- BNFC tokens.
+restOfLexerGrammar :: CF -> Doc
+restOfLexerGrammar cf = vcat
+ [ lexComments (comments cf)
+ , ""
+ , userDefTokens
+ , ifString strdec
+ , ifChar chardec
+ , ifC catDouble
+ [ "// Double predefined token type"
+ , "DOUBLE : DIGIT+ '.' DIGIT+ ('e' '-'? DIGIT+)?;"
+ ]
+ , ifC catInteger
+ [ "//Integer predefined token type"
+ , "INTEGER : DIGIT+;"
+ ]
+ , ifC catIdent
+ [ "// Identifier token type"
+ , "fragment"
+ , "IDENTIFIER_FIRST : LETTER | '_';"
+ , "IDENT : IDENTIFIER_FIRST (IDENTIFIER_FIRST | DIGIT)*;"
+ ]
+ , "// Whitespace"
+ , "WS : (' ' | '\\r' | '\\t' | '\\n' | '\\f')+ -> skip;"
+ , "// Escapable sequences"
+ , "fragment"
+ , "Escapable : ('\"' | '\\\\' | 'n' | 't' | 'r' | 'f');"
+ , "ErrorToken : . ;"
+ , ifString stringmodes
+ , ifChar charmodes
+ ]
+ where
+ ifC cat s = if isUsedCat cf (TokenCat cat) then vcat s else ""
+ ifString = ifC catString
+ ifChar = ifC catChar
+ strdec = [ "// String token type"
+ , "STRING : '\"' -> more, mode(STRINGMODE);"
+ ]
+ chardec = ["CHAR : '\\'' -> more, mode(CHARMODE);"]
+ userDefTokens = vcat
+ [ text (getRuleName name) <> " : " <> text (printRegJLex exp) <> ";"
+ | (name, exp) <- tokenPragmas cf ]
+ stringmodes = [ "mode STRESCAPE;"
+ , "STRESCAPED : Escapable -> more, popMode ;"
+ , "mode STRINGMODE;"
+ , "STRINGESC : '\\\\' -> more , pushMode(STRESCAPE);"
+ , "STRINGEND : '\"' -> type(STRING), mode(DEFAULT_MODE);"
+ , "STRINGTEXT : ~[\"\\\\] -> more;"
+ ]
+ charmodes = [ "mode CHARMODE;"
+ , "CHARANY : ~['\\\\] -> more, mode(CHAREND);"
+ , "CHARESC : '\\\\' -> more, pushMode(CHAREND),pushMode(ESCAPE);"
+ , "mode ESCAPE;"
+ , "ESCAPED : (Escapable | '\\'') -> more, popMode ;"
+ , "mode CHAREND;"
+ , "CHARENDC : '\\'' -> type(CHAR), mode(DEFAULT_MODE);"
+ ]
+
+
+-- | Stores multi and single line comment rules.
+lexComments :: ([(String, String)], [String]) -> Doc
+lexComments ([],[]) = ""
+lexComments (m,s) = vcat
+ (prod "COMMENT_antlr_builtin" lexSingleComment s ++
+ prod "MULTICOMMENT_antlr_builtin" lexMultiComment m )
+ where
+ prod bg lc ty = [bg, ": ("] ++ punctuate "|" (map lc ty) ++ skiplex
+ skiplex = [") -> skip;"]
+
+
+-- | Create lexer rule for single-line comments.
+--
+-- >>> lexSingleComment "--"
+-- '--' ~[\r\n]* (('\r'? '\n')|EOF)
+--
+-- >>> lexSingleComment "\""
+-- '"' ~[\r\n]* (('\r'? '\n')|EOF)
+lexSingleComment :: String -> Doc
+lexSingleComment c =
+ "'" <>text (escapeChars c) <> "' ~[\\r\\n]* (('\\r'? '\\n')|EOF)"
+
+
+-- | Create lexer rule for multi-lines comments.
+--
+-- There might be a possible bug here if a language includes 2 multi-line
+-- comments. They could possibly start a comment with one character and end it
+-- with another. However this seems rare.
+--
+-- >>> lexMultiComment ("{-", "-}")
+-- '{-' (.)*? '-}'
+--
+-- >>> lexMultiComment ("\"'", "'\"")
+-- '"\'' (.)*? '\'"'
+lexMultiComment :: (String, String) -> Doc
+lexMultiComment (b,e) = "'" <> text (escapeChars b)
+ <> "' (.)*? '"<> text (escapeChars e)
+ <> "'"
diff --git a/source/src/BNFC/Backend/Python/CFtoAntlr4Parser.hs b/source/src/BNFC/Backend/Python/CFtoAntlr4Parser.hs
new file mode 100644
index 00000000..14357e1c
--- /dev/null
+++ b/source/src/BNFC/Backend/Python/CFtoAntlr4Parser.hs
@@ -0,0 +1,342 @@
+{-
+ BNF Converter: Antlr4 Python Generator
+ Copyright (C) 2004 Author: Markus Forsberg, Michael Pellauer,
+ Bjorn Bringert
+
+ Description : This module generates the ANTLR .g4 input file for the
+ Python backend. It follows the same basic structure
+ of CFtoHappy.
+
+ Author : Gabriele Paganelli (gapag@distruzione.org)
+ Created : 15 Oct, 2015
+
+ Edited for Python 2024 by
+ : Björn Werner
+-}
+
+{-# LANGUAGE LambdaCase #-}
+
+module BNFC.Backend.Python.CFtoAntlr4Parser ( cf2AntlrParse ) where
+
+import Data.Foldable ( toList )
+import Data.List ( intercalate )
+import Data.Maybe
+import BNFC.CF
+import BNFC.Utils ( (+++), (+.+), applyWhen )
+import BNFC.Backend.Python.Antlr4Utils
+import BNFC.Backend.Common.NamedVariables
+import BNFC.Backend.Python.PyHelpers
+import Data.Either (lefts, rights, isLeft)
+
+
+-- | A definition of a non-terminal by all its rhss,
+-- together with parse actions.
+data PDef = PDef
+ { _pdNT :: Maybe String
+ -- ^ If given, the name of the lhss. Usually computed from 'pdCat'.
+ , _pdCat :: Cat
+ -- ^ The category to parse.
+ , _pdAlts :: [(Pattern, Action, Maybe Fun)]
+ -- ^ The possible rhss with actions. If 'null', skip this 'PDef'.
+ -- Where 'Nothing', skip ANTLR rule label.
+ }
+type Rules = [PDef]
+type Pattern = String
+type Action = String
+type MetaVar = (String, Cat)
+
+
+-- | Creates the ANTLR parser grammar for this CF.
+--The environment comes from CFtoAntlr4Lexer
+cf2AntlrParse :: String -> String -> CF -> KeywordEnv -> String
+cf2AntlrParse lang packageAbsyn cf env = unlines $ concat
+ [ [ header
+ , tokens
+ , importAbs
+ , ""
+ -- Generate start rules [#272]
+ -- _X returns [ dX result ] : x=X EOF { $result = $x.result; }
+ , prRules packageAbsyn $ map entrypoint $ toList $ allEntryPoints cf
+ -- Generate regular rules
+ , prRules packageAbsyn $ rulesForAntlr4 packageAbsyn cf env
+ ]
+ ]
+ where
+ header :: String
+ header = unlines
+ [ "// Parser definition for use with ANTLRv4"
+ , "parser grammar" +++ lang ++ "Parser;"
+ ]
+ tokens :: String
+ tokens = unlines
+ [ "options {"
+ , " tokenVocab = " ++ lang ++ "Lexer;"
+ , "}"
+ ]
+ importAbs :: String
+ importAbs = unlines
+ [ "@parser::header {import " ++ packageAbsyn
+ , "}"
+ ]
+
+
+-- | Generate start rule to help ANTLR.
+--
+-- @start_X returns [ X result ] : x=X EOF { $result = $x.result; } # Start_X@
+--
+entrypoint :: Cat -> PDef
+entrypoint cat =
+ PDef (Just nt) cat [(pat, act, fun)]
+ where
+ nt = firstLowerCase $ startSymbol $ identCat cat
+ pat = "x=" ++ catToNT cat +++ "EOF"
+ act = "$result = $x.result;"
+ -- No ANTLR Rule label, ("Start_" ++ identCat cat) conflicts with lhs.
+ fun = Nothing
+
+
+-- | The following functions are a (relatively) straightforward translation
+-- of the ones in CFtoHappy.hs
+rulesForAntlr4 :: String -> CF -> KeywordEnv -> Rules
+rulesForAntlr4 packageAbsyn cf env = map mkOne getrules
+ where
+ getrules = ruleGroups cf
+ mkOne (cat,rules) = constructRule packageAbsyn cf env rules cat
+
+
+-- | Aids the pattern constructor for lists
+data ListType = Term | Sep | None
+ deriving Eq
+
+
+-- | For every non-terminal, we construct a set of rules. A rule is a
+-- sequence of terminals and non-terminals, and an action to be performed.
+-- Complete sets of separator or terminator rules are treated separately, as
+-- the default recursive parsing may reach the maximum recursion depth
+-- in Python. Cases of multiple sets of rules are not considered.
+constructRule :: String -> CF -> KeywordEnv -> [Rule] -> NonTerminal -> PDef
+constructRule packageAbsyn cf env rules nt
+ | termOrSep /= None = PDef Nothing nt $
+ (if oneNilFun then
+ [ (" /* empty */ "
+ , "$result=[]"
+ , Nothing
+ )
+ ]
+ else
+ []
+ ) ++
+ [ ( generateListPatterns packageAbsyn env
+ (rhsRule (head consFuns)) termOrSep oneNilFun
+ , "# actions embedded in pattern"
+ , Nothing
+ )
+ ]
+ | otherwise = PDef Nothing nt $
+ [ ( p
+ , generateAction packageAbsyn nt (funRule r) m b
+ , Nothing -- labels not needed for BNFC-generated AST parser
+ )
+ | (index, r0) <- zip [1..] rules
+ , let b = isConsFun (funRule r0) && elem (valCat r0) (cfgReversibleCats cf)
+ , let r = applyWhen b revSepListRule r0
+ , let (p,m0) = generatePatterns index env r
+ , let m = applyWhen b reverse m0
+ ]
+ where
+ -- Figures out if the rules are well formed list rules (using the
+ -- separator or terminator pragmas).
+ nilFuns = filter isNilFun rules
+ oneFuns = filter isOneFun rules
+ consFuns = filter isConsFun rules
+
+ noNilFuns = length nilFuns == 0
+ noOneFuns = length oneFuns == 0
+
+ oneNilFun = length nilFuns == 1
+ oneOneFun = length oneFuns == 1
+ oneConsFun = length consFuns == 1
+
+ onlyMiddle :: [Either Cat String] -> Bool
+ onlyMiddle ecs = all isLeft [head ecs, last ecs]
+
+ noStrings :: [Either Cat String] -> Bool
+ noStrings ecs = length (rights ecs) == 0
+
+ -- Terminator:
+ -- []
+ -- (:) C ... [C]
+ isTerminator = oneNilFun && noOneFuns && oneConsFun &&
+ (noStrings . rhsRule . head) nilFuns &&
+ (onlyMiddle . rhsRule . head) consFuns
+
+ -- Terminator nonempty:
+ -- (:[]) C ...
+ -- (:) C ... [C]
+ isTerminatorNonempty = noNilFuns && oneOneFun && oneConsFun &&
+ (isLeft . head . rhsRule . head) oneFuns &&
+ (onlyMiddle . rhsRule . head) consFuns &&
+ (rights . rhsRule . head) oneFuns == ((rights . rhsRule . head) consFuns)
+
+ -- Separator:
+ -- []
+ -- (:[]) C
+ -- (:) C ... [C]
+ isSeparator = oneNilFun && oneOneFun && oneConsFun &&
+ (noStrings . rhsRule . head) nilFuns &&
+ (noStrings . rhsRule . head) oneFuns &&
+ (onlyMiddle . rhsRule . head) consFuns
+
+ -- Separator nonempty:
+ -- (:[]) C
+ -- (:) C ... [C]
+ isSeparatorNonempty = noNilFuns && oneOneFun && oneConsFun &&
+ (noStrings . rhsRule . head) oneFuns &&
+ (onlyMiddle . rhsRule . head) consFuns
+
+ termOrSep
+ | isTerminator || isTerminatorNonempty = Term
+ | isSeparator || isSeparatorNonempty = Sep
+ | otherwise = None
+
+
+-- | Generates a string containing the semantic action.
+generateAction :: IsFun f => String -> NonTerminal -> f -> [MetaVar]
+ -> Bool -- ^ Whether the list should be reversed or not.
+ -- Only used if this is a list rule.
+ -> Action
+generateAction packageAbsyn _ f ms rev
+ | isNilFun f = "$result =" ++ c ++ ";"
+ | isOneFun f = "$result =" ++ c ++ ";; $result.append(" ++ p_1 ++ ")"
+ | isConsFun f = "$result =" ++ p_2 ++ ";; $result." ++ add p_1
+ | isCoercion f = "$result = " ++ p_1 ++ ";"
+ | isDefinedRule f =
+ "$result = " ++ packageAbsyn ++ "." ++ sanitize (funName f)
+ ++ "(" ++ intercalate "," (map (resultvalue packageAbsyn) ms) ++ ");"
+ | otherwise = "$result = " ++ c
+ ++ "(" ++ intercalate "," (map (resultvalue packageAbsyn) ms) ++ ");"
+ where
+ sanitize = getRuleName
+ c = if isNilFun f || isOneFun f || isConsFun f
+ then "[]"
+ else packageAbsyn ++ "." ++ sanitize (funName f)
+ p_1 = resultvalue packageAbsyn $ ms!!0
+ p_2 = resultvalue packageAbsyn $ ms!!1
+ add p = (if rev then "append(" else "insert(0, ") ++ p ++ ")"
+
+
+-- | Gives the abstract syntax constructor for a cat.
+resultvalue :: String -> MetaVar -> String
+resultvalue packageAbsyn (n,c) = case c of
+ TokenCat "Double" -> concat [ packageAbsyn ++ ".Double(", txt, ")" ]
+ TokenCat "Integer" -> concat [ packageAbsyn ++ ".Integer(" , txt, ")" ]
+ TokenCat "Char" -> packageAbsyn ++ ".Char(" ++ txt ++ ")"
+ TokenCat "String" -> packageAbsyn ++ ".String(" ++ txt ++ ")"
+ TokenCat "Ident" -> concat [ packageAbsyn, ".Ident(", txt, ")" ]
+ TokenCat s -> packageAbsyn ++ "." ++ unkw s ++ "(" ++ txt ++ ")"
+ _ -> concat [ "$", n, ".result" ]
+ where
+ txt = '$':n +.+ "text"
+
+
+ -- | Generate patterns and a set of metavariables indicating
+-- where in the pattern the non-terminal
+-- >>> generatePatterns 2 [] $ npRule "myfun" (Cat "A") [] Parsable
+-- (" /* empty */ ",[])
+-- >>> generatePatterns 3 [("def", "_SYMB_1")] $ npRule "myfun" (Cat "A") [Right "def", Left (Cat "B")] Parsable
+-- ("_SYMB_1 p_3_2=b",[("p_3_2",B)])
+generatePatterns :: Int -> KeywordEnv -> Rule -> (Pattern,[MetaVar])
+generatePatterns ind env r =
+ case rhsRule r of
+ [] -> (" /* empty */ ", [])
+ its -> ( unwords $ mapMaybe (uncurry mkIt) nits
+ , [ (var i, cat) | (i, Left cat) <- nits ]
+ )
+ where
+ nits = zip [1 :: Int ..] its
+ var i = "p_" ++ show ind ++"_"++ show i
+ mkIt i = \case
+ Left c -> Just $ var i ++ "=" ++ catToNT c
+ Right s -> lookup s env
+
+
+-- | Nonempty patterns with embedded semantic actions, to match:
+-- Separator:
+-- C (... C)*
+-- Terminator:
+-- (C ...)+
+-- A terminators for example is turned into the pattern:
+-- {init list action} ( p_1_2=C {append action} )+
+-- Not that for separators with empty, consFun empty is a possible derivation,
+-- meaning a separator can end with delims:
+-- C (... C)* (...)?
+generateListPatterns :: String -> KeywordEnv -> [Either Cat String] ->
+ ListType -> Bool -> Pattern
+generateListPatterns packageAbsyn env ecss termOrSep oneNilFun =
+ case termOrSep of
+ Sep -> p1 ++ " {" ++ a1 ++ "} (" ++ delims ++ " " ++ p2 ++
+ " {" ++ a2 ++ "})*" ++ if oneNilFun then "(" ++ delims ++ ")?" else ""
+ Term -> "{$result=[];} (" ++ p2 ++ " " ++ delims ++ " {" ++ a2 ++ "})+"
+ None -> error "Neither Term or Sep"
+ where
+ c = head (lefts ecss)
+ p1 = "p_1_1=" ++ catToNT c
+ p2 = "p_1_2=" ++ catToNT c
+
+ a1 = "$result = [" ++ resultvalue packageAbsyn ("p_1_1", c) ++ "]"
+ a2 = "$result.append(" ++ resultvalue packageAbsyn ("p_1_2", c) ++ ")"
+
+ delims = unwords (mapMaybe (\s -> lookup s env) (rights ecss))
+
+
+-- | Converts a cat to string, an underscore is appended for keywords words.
+catToNT :: Cat -> String
+catToNT = \case
+ TokenCat "Ident" -> "IDENT"
+ TokenCat "Integer" -> "INTEGER"
+ TokenCat "Char" -> "CHAR"
+ TokenCat "Double" -> "DOUBLE"
+ TokenCat "String" -> "STRING"
+ c | isTokenCat c -> getRuleName $ identCat c
+ | otherwise -> getRuleName $ firstLowerCase $ identCat c
+
+
+-- | Puts together the pattern and actions and returns a string containing all
+-- the rules.
+prRules :: String -> Rules -> String
+prRules packabs = concatMap $ \case
+
+ -- No rules: skip.
+ PDef _mlhs _nt [] -> ""
+
+ -- At least one rule: print!
+ PDef mlhs nt (rhs : rhss) -> unlines $ concat
+
+ -- The definition header: lhs and type.
+ [ [ unwords [ fromMaybe nt' mlhs
+ , "returns" , "[" , packabs+.+normcat , "result" , "]"
+ ]
+ ]
+ -- The first rhs.
+ , alternative " :" rhs
+ -- The other rhss.
+ , concatMap (alternative " |") rhss
+ -- The definition footer.
+ , [ " ;" ]
+ ]
+ where
+ alternative sep (p, a, label) = concat
+ [ [ concat [ sep , p ] ]
+ , [ concat [ " {" , a , "}" ] ]
+ , [ concat [ " #" , antlrRuleLabel l ] | Just l <- [label] ]
+ ]
+ catid = identCat nt
+ normcat = identCat (normCat nt)
+ nt' = getRuleName $ firstLowerCase catid
+ antlrRuleLabel :: Fun -> String
+ antlrRuleLabel fnc
+ | isNilFun fnc = catid ++ "_Empty"
+ | isOneFun fnc = catid ++ "_AppendLast"
+ | isConsFun fnc = catid ++ "_PrependFirst"
+ | isCoercion fnc = "Coercion_" ++ catid
+ | otherwise = getLabelName fnc
diff --git a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
index dd346984..6840453a 100644
--- a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
+++ b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
@@ -6,33 +6,18 @@
-}
module BNFC.Backend.Python.CFtoPyAbs (cf2PyAbs) where
-import Data.List ( nub, intercalate )
+import Data.List (nub)
+import Data.Char (isLower)
+import Data.Either (lefts)
import BNFC.CF
import BNFC.Backend.Python.PyHelpers
-import BNFC.Backend.Python.RegToFlex (printRegFlex, escapeChar)
import BNFC.Backend.Common.NamedVariables
import Text.PrettyPrint (Doc, render)
-import Data.Either (lefts)
-import Data.Char (toLower, toUpper, isLower)
-import qualified Data.List.NonEmpty as List1
--- | The result is ParsingDefs.py & Absyn.py
-cf2PyAbs
- :: String
- -> CF -- ^ Grammar.
- -> (String, String) -- ParsingDefs.py, Absyn.py.
-cf2PyAbs pkgName cf = ( unlines
- [ "from lark import Lark, Transformer, v_args"
- , "from dataclasses import dataclass"
- , "from " ++ pkgName ++ ".Absyn import *"
- , ""
- , createGrammar cf
- , createTransformer cf
- , createDefineFunctions cf
- , ""
- ]
- , unlines
+-- | Produces the content for Absyn.py
+cf2PyAbs :: CF -> String
+cf2PyAbs cf = unlines
["from typing import List as _List"
,"# Value categories (no coercsions):"
, unlines valueCatsClasses
@@ -42,8 +27,9 @@ cf2PyAbs pkgName cf = ( unlines
,"# Rules:"
,"from dataclasses import dataclass, field"
,"\n" ++ (unlines dataClasses)
+ , ""
+ , createDefineFunctions cf
]
- )
where
rules = cfgRules cf
@@ -71,267 +57,6 @@ cf2PyAbs pkgName cf = ( unlines
valueCatsClasses = map createValueCatClass valueCatNames
--- Creates a grammar for Lark. Not that it is a real string (r"...").
-createGrammar :: CF -> String
-createGrammar cf = unlines
- [ "grammar = r\"\"\""
- , " ?start_: " ++ entryOrClause
- , ""
- , unlines orClauses
- , larkLiterals cf
- , unlines singleComments
- , unlines multiComments
- , " %import common.WS"
- , " %ignore WS"
- , "\"\"\""
- ]
- where
- aCats = reallyAllCats cf
- rs = cfgRules cf
-
- enumeratedRules :: [(Int, Rul RFun)]
- enumeratedRules = enumerateAllDefinedRules rs 1 []
- orClauses = map (createOrClause enumeratedRules) aCats
-
- (multiMatchers, singleMatchers) = comments cf
- singleComments = map createLineCommentMatcher singleMatchers
- multiComments = map createMultiLineCommentMatcher multiMatchers
-
- strListEntryPoints = map ((map toLower) . translateToList . show)
- ((List1.toList . allEntryPoints) cf)
- entryOrClause = intercalate "\n | " strListEntryPoints
-
-
--- Enumerates all (only defined relevant) rules to prevent naming overlap.
-enumerateAllDefinedRules :: [Rul RFun] -> Int -> [(Int, Rul RFun)]
- -> [(Int, Rul RFun)]
-enumerateAllDefinedRules [] _ irs = irs
-enumerateAllDefinedRules (r:rs) n irs
- | isDefinedRule r = enumerateAllDefinedRules rs (n+1) (irs ++ [(n, r)])
- | otherwise = enumerateAllDefinedRules rs n (irs ++ [(0, r)])
-
-
--- Creates an or clause with all rules for a given category.
-createOrClause :: [(Int, Rul RFun)] -> Cat -> String
-createOrClause irs c = unlines
- [ " ?" ++ map toLower (translateToList (show c)) ++ ": " ++
- intercalate "\n | "
- (map createProdAndNameForRule catsIrs)
- ]
- where
- catsIrs = [(n, removeWhiteSpaceSeparators r) | (n, r) <- irs,
- valCat r == c, isParsable r]
-
-
--- Creates an entry for an or clause.
-createProdAndNameForRule :: (Int, Rul RFun) -> String
-createProdAndNameForRule (n, r) = prodToDocStr (rhsRule r) ++
- if (not (isCoercion r)) then " -> " ++ map toLower name else ""
- where
- name
- | isNilFun r = "nil" ++ (identCat . valCat) r
- | isOneFun r = "one" ++ (identCat . valCat) r
- | isConsFun r = "cons" ++ (identCat . valCat) r
- | isDefinedRule r = "d" ++ show n ++ "_r_" ++ funName r
- | otherwise = "r_" ++ map toLower (funName r) ++ toOrd (funName r)
-
-
--- Creates the literals for a grammar for Lark.Priority is set after the
--- dot, such as "Name.PRIO". For literals with the same priority, it appears
--- that Lark (with basic mode) prioritizes the longest regular
--- expression, not the longest matched literal.
-larkLiterals :: CF -> String
-larkLiterals cf = unlines $ concat
- [
- ifC catString [createLiteral "String.1" "\"(\\\\.|[^\"])*\""]
- , ifC catChar [createLiteral "Char.1" "\\'(\\\\x[0-9a-f][0-9a-f]|\\\\?[\\S\\s])\\'"]
- , ifC catDouble [createLiteral "Double.1" "\\d+\\.\\d+(e-?\\d+)?"]
- , ifC catInteger [createLiteral "Integer.1" "\\d+"]
- -- Prolog requires user defined tokens to have priority over Ident; C
- -- requires Double to have priority over user defined tokens, as C has
- -- "CDouble" matching "3." in 3.14.
- , userDefTokens
- , ifC catIdent [createLiteral "Ident" "[A-Za-z_]\\w*"]
- ]
- where
- ifC :: TokenCat -> [String] -> [String]
- ifC cat s = if isUsedCat cf (TokenCat cat) then s else []
-
- userDefTokens :: [String]
- userDefTokens = [
- createLiteral name (printRegFlex exp) | (name, exp) <- tokenPragmas cf
- ]
-
- createLiteral :: String -> String -> String
- createLiteral name regex =
- " " ++ map toUpper name ++ ": /" ++ regex ++ "/"
-
-
--- Creates the class transformer, where each member method tells Lark how
--- to transform some parsed node in the tree.
-createTransformer :: CF -> String
-createTransformer cf = unlines
- [ "#transformer"
- , "class TreeTransformer(Transformer):"
- , unlines (map createRuleTransform rs)
- , unlines (map makeDefineTransform enumeratedRDs)
- , unlines (map createListTransform listRules)
- , createTokenTransformers cf
- ]
- where
- enumeratedRules :: [(Int, Rul RFun)]
- enumeratedRules = enumerateAllDefinedRules (cfgRules cf) 1 []
-
- rs = [r | r <- cfgRules cf
- , not (isCoercion r)
- , not (isNilCons r)
- , not (isDefinedRule r)]
- listRules = [r | r <- cfgRules cf, isNilCons r]
-
- enumeratedRDs = [(n, r, d) | (n, r) <- enumeratedRules, d <- definitions cf
- , not (isCoercion r)
- , not (isNilCons r)
- , isDefinedRule r
- , nameCorresponds ((wpThing . defName) d) (funName r)]
-
-
--- Creates a transform for a rule
-createRuleTransform :: Rul RFun -> String
-createRuleTransform r = unlines
- [ " @v_args(inline=True)"
- , " def r_" ++ nameWithUnicode ++ "(self" ++
- concat (map (", " ++) enumeratedVars) ++ "):"
- , " return " ++ className ++ "(" ++ intercalate ", " enumeratedVars ++ ")"
- ]
- where
- nameWithUnicode = map toLower (funName r) ++ toOrd (funName r)
- className = unkw (funName r)
- sentForm = rhsRule r
- nvCats = numVars sentForm :: [Either (Cat, Doc) String]
- enumeratedVars = [render d | (_, d) <- lefts nvCats]
-
-
--- Creates a transform for a list rule.
-createListTransform :: Rul RFun -> String
-createListTransform r = unlines
- [ " @v_args(inline=True)"
- , " def " ++ map toLower name ++ "(self" ++
- concat (map (", " ++) enumeratedVars) ++ "):"
- , " return " ++ args
- ]
- where
- name
- | isNilFun r = "nil" ++ (identCat . valCat) r
- | isOneFun r = "one" ++ (identCat . valCat) r
- | isConsFun r = "cons" ++ (identCat . valCat) r
- | otherwise = funName r
-
- sentForm = rhsRule r
- nvCats = numVars sentForm :: [Either (Cat, Doc) String]
- enumeratedVars = [render d | (_, d) <- lefts nvCats]
-
- args :: String
- | isNilFun r = "[]"
- | isOneFun r = "[" ++ head enumeratedVars ++ "]"
- | isConsFun r = "[" ++ head enumeratedVars ++ "] + " ++
- last enumeratedVars
- | otherwise = error "Should be a list function"
-
-
--- Creates the transformer functions for the tokens.
-createTokenTransformers :: CF -> String
-createTokenTransformers cf = unlines $ concat
- [
- ifC catString [createTokenTransform "String"]
- , ifC catChar [createTokenTransform "Char"]
- , ifC catDouble [createTokenTransform "Double"]
- , ifC catInteger [createTokenTransform "Integer"]
- , userDefTokens
- , ifC catIdent [createTokenTransform "Ident"]
- ]
- where
- ifC :: TokenCat -> [String] -> [String]
- ifC cat s = if isUsedCat cf (TokenCat cat) then s else []
-
- userDefTokens :: [String]
- userDefTokens = [
- createTokenTransform name | (name, _) <- tokenPragmas cf
- ]
-
-
--- Creates a transform for a token.
-createTokenTransform :: String -> String
-createTokenTransform name = unlines
- [ " @v_args(inline=True)"
- , " def " ++ map toUpper name ++ "(self, token):"
- , " return " ++ unkw name ++ "(token.value)"
- ]
-
-
--- | Produces the production in the docstring for the parsing definitions.
-prodToDocStr ::[Either Cat String] -> String
-prodToDocStr [] = ""
-prodToDocStr (ec:[]) = ecsToDocStr ec
-prodToDocStr (ec:ecs) =
- ecsToDocStr ec ++ " " ++ prodToDocStr ecs
-
-
--- Converts a single element in the production.
-ecsToDocStr :: Either Cat String -> String
-ecsToDocStr (Left (TokenCat t)) = map toUpper t
-ecsToDocStr (Left c) = map toLower (translateToList (show c))
-ecsToDocStr (Right strOp) = "\"" ++ concat (map escapeBackslash strOp) ++ "\""
-
-
--- | For single-line comments
-createLineCommentMatcher :: String -> String
-createLineCommentMatcher r = unlines
- [ " C" ++ toOrd r ++ ": /" ++ concat (map escapeChar r) ++ "[^\\n]*/"
- , " %ignore C" ++ toOrd r
- ]
-
-
--- | For multi-line comments
-createMultiLineCommentMatcher :: (String, String) -> String
-createMultiLineCommentMatcher (s, e) = unlines
- [ " C" ++ toOrd (s ++ e) ++ ": /" ++ escaped s ++ "([\\s\\S]*?)" ++
- escaped e ++ "/"
- , " %ignore C" ++ toOrd (s ++ e)
- ]
- where
- escaped s = concat $ map escapeChar s
-
-
--- Since we're using a real string for the grammar, r""" ... """ it seems
--- we can't escape everything in strOp from regflex. Only backslashes.
-escapeBackslash :: Char -> String
-escapeBackslash '\\' = "\\\\"
-escapeBackslash c = [c]
-
-
--- | To compare names for defines. The first letter needs to be lowered, so
--- "while" == "While".
-nameCorresponds :: String -> String -> Bool
-nameCorresponds (x:xs) (y:ys) = (toLower x == toLower y) && (xs == ys)
-nameCorresponds _ _ = error "Names can't be empty"
-
-
--- Creates a transformer for a rule with its corresponding define.
-makeDefineTransform :: (Int, Rul RFun, Define) -> String
-makeDefineTransform (n, defRule, defi) = unlines
- [ " @v_args(inline=True)"
- , " def d" ++ show n ++ "_r_" ++ map toLower name ++ "(self" ++
- concat (map (", " ++) enumeratedVars) ++ "):"
- , " return d_" ++ name ++ "(" ++ intercalate ", " enumeratedVars ++ ")"
- , ""
- ]
- where
- name = (wpThing . defName) defi
- sentForm = rhsRule defRule
- nvCats = numVars sentForm :: [Either (Cat, Doc) String]
- enumeratedVars = [render d | (_, d) <- lefts nvCats]
-
-
-- | Converts the production of a define, called an expression, to a
-- production for the parsing definition.
expToDef :: CF -> Exp -> String
@@ -341,7 +66,7 @@ expToDef cf (App "(:)" _ (e:[recList])) = "[" ++ expToDef cf e ++ ", " ++
expToDef _ (App "[]" _ _) = "[]"
expToDef cf (App fName _ exps)
| isLower (head fName) =
- "d_" ++ fName ++ "(" ++ addCommas (map (expToDef cf) exps) ++ ")"
+ unkw fName ++ "(" ++ addCommas (map (expToDef cf) exps) ++ ")"
| otherwise =
unkw fName ++ "(" ++ addCommas (map (expToDef cf) exps) ++ ")"
expToDef _ (Var s) = unkw s
@@ -418,7 +143,7 @@ createDefineFunctions cf = unlines
createDefineFunction :: CF -> Define -> String
createDefineFunction cf d = unlines
- [ "def d_" ++ (wpThing . defName) d ++ "(" ++ addCommas args ++ "):"
+ [ "def " ++ (unkw . wpThing . defName) d ++ "(" ++ addCommas args ++ "):"
, " return " ++ expToDef cf (defBody d)
]
where
diff --git a/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs b/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs
index 352f12c1..8464bff6 100644
--- a/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs
+++ b/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs
@@ -6,20 +6,22 @@
-}
module BNFC.Backend.Python.CFtoPyPrettyPrinter ( cf2PyPretty ) where
-import Data.List ( intercalate, nub )
+import Data.List ( intercalate, nub, findIndices )
import BNFC.CF
import BNFC.Backend.Python.PyHelpers
import BNFC.Backend.Common.NamedVariables
import Text.PrettyPrint (Doc, render)
-import Data.Either (lefts)
+import Data.Either (rights, lefts, isLeft)
import BNFC.Backend.Common.StrUtils
import qualified Data.List.NonEmpty as List1
+
-- | Used to create PrettyPrinter.py, that contains the functionality
-- to print the AST and the linearized tree.
cf2PyPretty :: String -> CF -> String
cf2PyPretty pkgName cf = unlines
[ "from " ++ pkgName ++ ".Absyn import *"
+ , "import itertools"
, ""
, makePrintAST cf
, ""
@@ -88,7 +90,7 @@ makePrintAST cf = concat
| otherwise = ""
--- Creates deconstructors for all list categories.
+-- | Creates deconstructors for all list categories.
makeListDecons :: CF -> String
makeListDecons cf = unlines $ map (makeListDecon cf) listCats
where
@@ -97,7 +99,7 @@ makeListDecons cf = unlines $ map (makeListDecon cf) listCats
listCats = [c | c <- valCats, isList c]
--- Creates a deconstructor for some list category.
+-- | Creates a deconstructor for some list category.
makeListDecon :: CF -> Cat -> String
makeListDecon cf c = concat
[ "def list" ++ name ++ "Decon(xs):\n"
@@ -120,10 +122,11 @@ makeListDecon cf c = concat
[] -> Nothing
rs -> Just (head rs)
- -- List rules are of the form:
- -- [C] ::= symbols.. C symbols.. [C]
- -- The production, in Python, is concatenated recursively:
- -- symbols.. + c(xs[0], 'C1') + symbols.. + listCDecon(xs[1:]) + symbols..
+ noOneFun = case oneRule of
+ Nothing -> True
+ _ -> False
+
+ -- Builds the production recursively
sentFormToArgs :: Int -> [Either Cat String] -> String
sentFormToArgs _ [] = "[]"
sentFormToArgs v (Right strOp:ecss) =
@@ -131,7 +134,7 @@ makeListDecon cf c = concat
sentFormToArgs v ecss
sentFormToArgs v (Left _:ecss)
| v == 0 = "c(xs[0], '" ++ name ++ "') + " ++ sentFormToArgs (v+1) ecss
- | v == 1 = "list" ++ name ++ "Decon(xs[1:]) + " ++
+ | v == 1 = error "Python backend error - should use iterative approach for cons" --"list" ++ name ++ "Decon(xs[1:]) + " ++
sentFormToArgs (v+1) ecss
| otherwise = error "A list production can max have C and [C]."
@@ -149,9 +152,37 @@ makeListDecon cf c = concat
, " return " ++ sentFormToArgs 0 (rhsRule r)
]
+ -- Adds each element with delims iteratively
consRuleStr = case consRule of
Nothing -> ""
- Just r -> " return " ++ sentFormToArgs 0 (rhsRule r) ++ "\n"
+ Just r -> unlines
+ [ " " ++ start
+ , " for x in xs[:" ++ endIndice ++ "][::-1]:"
+ , " tot += " ++ add endlims ++ "[]"
+ , " tot = " ++ add delims ++ "tot"
+ , " tot = c(x, '" ++ name ++ "') + tot"
+ , " tot = " ++ add prelims ++ "tot"
+ , " return tot"
+ ]
+ where
+ ecss = rhsRule r
+ indices = findIndices isLeft ecss
+ i1 = head indices
+ i2 = last indices
+ prelims = rights $ take i1 ecss
+ endlims = rights $ drop i2 ecss
+ delims = rights $ drop i1 $ take i2 ecss
+
+ start
+ | not noOneFun = "tot = list" ++ name ++ "Decon(xs[-1:])"
+ | otherwise = "tot = list" ++ name ++ "Decon([])"
+
+ add :: [String] -> String
+ add ss = concat $ map (\s-> "['" ++ escapeChars s ++ "'] + ") ss
+
+ endIndice
+ | not noOneFun = "-1"
+ | otherwise = ""
-- | Creates the renderC function, which creates a string of a list of
@@ -286,12 +317,12 @@ makeLinFunc cf = unlines
, " return [ast]"
]
]
- , " # skeleTokenCases:"
- , unlines skeleTokenCases
- , " # skeleRuleCases:"
- , unlines skeleRuleCases
- , -- Deals with cases where the entrypoint is say [Stm] or [Exp],
- -- with pattern matching on the first object in the list.
+ , " # Token cases:"
+ , unlines tokenCases
+ , " # Rule cases:"
+ , unlines ruleCases
+ , -- Deals with cases where the entrypoint is say [Stm] or
+ -- [Exp] with pattern matching on the first object in the list.
" case " ++ "list():"
, " if len(ast) == 0:"
, " return []"
@@ -305,7 +336,7 @@ makeLinFunc cf = unlines
, " raise Exception(str(ast.__class__) + ' unmatched')"
]
where
- -- Used to include standard literals, if needed.
+ -- To include standard literals, if needed.
ifUsedThen :: TokenCat -> [String] -> String
ifUsedThen cat ss
| isUsedCat cf (TokenCat cat) = unlines ss
@@ -319,8 +350,8 @@ makeLinFunc cf = unlines
, not (isNilCons r)
]
- skeleTokenCases = map makeSkeleTokenCase (tokenNames cf)
- skeleRuleCases = map makeSkeleRuleCase rules
+ tokenCases = map makeTokenCase (tokenNames cf)
+ ruleCases = map makeRuleCase rules
catEntrypointsForLists =
[catOfList c | c <- (List1.toList . allEntryPoints) cf, isList c]
@@ -356,9 +387,9 @@ makeListEntrypointCase cf c = concat
]
--- Creates a case for a user defined literal, which inherits str.
-makeSkeleTokenCase :: String -> String
-makeSkeleTokenCase tokenName = concat
+-- | Creates a case for a user defined literal, which inherits str.
+makeTokenCase :: String -> String
+makeTokenCase tokenName = concat
[ " case " ++ unkw tokenName ++ "():\n"
, " return [ast]"
]
@@ -366,12 +397,11 @@ makeSkeleTokenCase tokenName = concat
-- | Creates a case for some rule, with the additional information of what
-- separator- and terminator-delimiters there are.
-makeSkeleRuleCase :: Rul RFun -> String
-makeSkeleRuleCase rule = concat
+makeRuleCase :: Rul RFun -> String
+makeRuleCase rule = concat
[ " case " ++ unkw fName ++ "(" ++ varNamesCommad ++ "):\n"
, " # " ++ (showEcss sentForm) ++ "\n"
- , " return " ++ if (length args > 0)
- then (intercalate " + " args)
+ , " return " ++ if (length args > 0) then (intercalate " + " args)
else "[]"
]
where
diff --git a/source/src/BNFC/Backend/Python/PyHelpers.hs b/source/src/BNFC/Backend/Python/PyHelpers.hs
index f68abe13..a1258e85 100644
--- a/source/src/BNFC/Backend/Python/PyHelpers.hs
+++ b/source/src/BNFC/Backend/Python/PyHelpers.hs
@@ -87,21 +87,19 @@ showEcss (Right strOp:ecss) = "\"" ++ strOp ++ "\" " ++ (showEcss ecss)
-- | Adds an underscore if the string overlaps with a keyword.
unkw :: String -> String
-unkw s = if s `elem` kwListWithSoftKeywords then s ++ "_" else s
+unkw s = if s `elem` pythonReserved then s ++ "_" else s
--- To add an extra underscore if something overlaps with a keyword.
-kwListWithSoftKeywords :: [String]
-kwListWithSoftKeywords =
- [ "False"
- , "None"
- , "True"
- , "and"
+-- | Python keyword list plus soft keywords
+pythonReserved :: [String]
+pythonReserved =
+ [ "and"
, "as"
, "assert"
, "async"
, "await"
, "break"
+ , "case"
, "class"
, "continue"
, "def"
@@ -109,6 +107,7 @@ kwListWithSoftKeywords =
, "elif"
, "else"
, "except"
+ , "False"
, "finally"
, "for"
, "from"
@@ -118,19 +117,19 @@ kwListWithSoftKeywords =
, "in"
, "is"
, "lambda"
+ , "match"
+ , "None"
, "nonlocal"
, "not"
, "or"
, "pass"
, "raise"
, "return"
+ , "True"
, "try"
+ , "type"
, "while"
, "with"
, "yield"
, "_"
- , "case"
- , "match"
- , "type"
]
-
diff --git a/testing/src/ParameterizedTests.hs b/testing/src/ParameterizedTests.hs
index 8231c8eb..c5876296 100644
--- a/testing/src/ParameterizedTests.hs
+++ b/testing/src/ParameterizedTests.hs
@@ -450,7 +450,7 @@ parameters = concat
}
pythonParams = base
{ tpBuild = do
- return () -- nothing to make or compile
+ tpMake
,
tpRunTestProg = \ _lang args -> do
pyFile_ <- findFile "genTest.py"