From bdcc8f7cf380b8539273e6a54ba78258b606bade Mon Sep 17 00:00:00 2001
From: AiStudent <diskfragmenteraren@gmail.com>
Date: Tue, 27 Aug 2024 14:54:29 +0200
Subject: [PATCH 1/8] Added Python backend for use with PLY

---
 docs/user_guide.rst                           |  87 ++++
 document/BNF_Converter_Python_Mode.html       | 218 ++++++++++
 source/BNFC.cabal                             |   9 +
 source/main/Main.hs                           |   3 +
 source/src/BNFC/Backend/Python.hs             | 149 +++++++
 source/src/BNFC/Backend/Python/CFtoPyAbs.hs   | 382 ++++++++++++++++
 source/src/BNFC/Backend/Python/CFtoPyLex.hs   | 189 ++++++++
 .../Backend/Python/CFtoPyPrettyPrinter.hs     | 409 ++++++++++++++++++
 source/src/BNFC/Backend/Python/CFtoPySkele.hs | 114 +++++
 source/src/BNFC/Backend/Python/PyHelpers.hs   |  80 ++++
 source/src/BNFC/Backend/Python/RegToFlex.hs   |  97 +++++
 source/src/BNFC/Options.hs                    |   7 +
 testing/src/ParameterizedTests.hs             |  12 +
 13 files changed, 1756 insertions(+)
 create mode 100644 document/BNF_Converter_Python_Mode.html
 create mode 100644 source/src/BNFC/Backend/Python.hs
 create mode 100644 source/src/BNFC/Backend/Python/CFtoPyAbs.hs
 create mode 100644 source/src/BNFC/Backend/Python/CFtoPyLex.hs
 create mode 100644 source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs
 create mode 100644 source/src/BNFC/Backend/Python/CFtoPySkele.hs
 create mode 100644 source/src/BNFC/Backend/Python/PyHelpers.hs
 create mode 100644 source/src/BNFC/Backend/Python/RegToFlex.hs

diff --git a/docs/user_guide.rst b/docs/user_guide.rst
index 041c508a..8273f1d5 100644
--- a/docs/user_guide.rst
+++ b/docs/user_guide.rst
@@ -284,3 +284,90 @@ BNFC adds the grammar name as a file extension. So if the grammar file is
 named ``Calc.cf``, the lexer will be associated to the file extension
 ``.calc``. To associate other file extensions to a generated lexer, you need to
 modify (or subclass) the lexer.
+
+Python Backend
+===============
+
+The BNF Converter's Python Backend generates a Python frontend, that uses 
+`PLY <https://www.dabeaz.com/ply/ply.html>`_ (Python Lex Yacc), to parse
+input into an AST (abstract syntax tree).
+
+Python 3.10 or higher is needed.
+
+Example usage: ::
+
+    bnfc --python Calc.cf
+
+
+.. list-table:: The result is a set of files:
+   :widths: 25 25
+   :header-rows: 1
+
+   * - Filename
+     - Description
+   * - bnfcPyGenCalc/Absyn.py
+     - Provides the classes for the abstract syntax.
+   * - bnfcPyGenCalc/LexTokens.py
+     - Provides PLY with the information needed to build the lexer.
+   * - bnfcPyGenCalc/ParserDefs.py
+     - Provides PLY with the information needed to build the parser.
+   * - bnfcPyGenCalc/PrettyPrinter.py
+     - Provides printing for both the AST and the linearized tree.
+   * - genTest.py
+     - A ready test-file, that uses the generated frontend to convert input into an AST.
+   * - skele.py
+     - Provides skeleton code to deconstruct an AST, using structural pattern matching.
+
+Optionally one may with ``-m``` also create a makefile that contains the target
+"distclean" to remove the generated files.
+
+Testing the frontend
+....................
+
+It's possible to pipe input, like::
+
+    echo "(1 + 2) * 3" | python3.10 genTest.py
+
+or::
+
+    python3.10 genTest.py < file.txt
+
+and it's possible to just use an argument::
+
+    python3.10 genTest.py file.txt
+
+
+Caveats
+.......
+
+Presentation of conflicts in a grammar:
+
+  A symbol-to-unicode transformation is made for the terminals in the grammar,
+  for example from "++" to "S_43_43". This however obfuscates PLYs generated
+  information of the grammar in the "parser.out" file. Users are hence
+  encouraged to use the Haskell backend to debug grammars and identify
+  conflicts.
+
+Several entrypoints:
+
+  At the top of the ParserDefs.py file an additional rule is added, that has
+  every defined entrypoint as a possible production. This may create warnings 
+  for conflicts, as it may introduce ambiguity. Therefore the added
+  parsing rule is by default removed beneath the function, with the statement
+  "del p__Start", and included if the user comments out the removal of
+  "p__Start".
+
+Special cases for special characters:
+
+  Using non-special characters, instead of say parentheses when defining rules,
+  may not yield the expected behaviour. Using the below rule, an expression
+  such as "a1+2a" can not be parsed since the a's are classified as reserved
+  keywords, like "int", instead of symbols like "+"::
+
+    _. Exp1 ::= "a" Exp "a" ;
+
+Results from the parameterized tests:
+
+  While the Python backend generates working frontends for the example
+  grammars, four "failures" and six "errors" among the regression
+  tests are reported.
diff --git a/document/BNF_Converter_Python_Mode.html b/document/BNF_Converter_Python_Mode.html
new file mode 100644
index 00000000..429b5c44
--- /dev/null
+++ b/document/BNF_Converter_Python_Mode.html
@@ -0,0 +1,218 @@
+<!DOCTYPE html>
+<head>
+  <meta http-equiv="content-type"
+ content="text/html; charset=ISO-8859-1">
+  <title>BNF Converter Python Mode</title>
+</head>
+<style>
+  table {
+    font-family: arial, sans-serif;
+    border-collapse: collapse;
+    width: 100%;
+  }
+  
+  td, th {
+    text-align: left;
+    padding: 4px;
+  }
+  
+  </style>
+<body>
+<div style="text-align: center;">
+<h2>BNF Converter</h2>
+<h2>Python Mode</h2>
+</div> 
+<h3>By Björn Werner</h3>
+
+<h3>2024</h3>
+<p>
+  The BNF Converter's Python Backend generates a Python frontend, that uses 
+  PLY (Python Lex Yacc), to parse input into an AST (abstract syntax tree).
+</p>
+<p>
+  BNFC on Github:<br>
+  <a href="https://github.com/BNFC/bnfc">https://github.com/BNFC/bnfc</a>
+</p>
+<p>
+  PLY homepage:<br>
+  <a href="https://www.dabeaz.com/ply/ply.html">https://www.dabeaz.com/ply/ply.html</a>
+</p>
+<p>
+  Python 3.10 or higher is needed.
+</p>
+<h3>Usage</h3>
+<div style="margin-left: 40px; "><big><span style="font-family: monospace; ">
+    bnfc --python NAME.cf</span></big><br style="font-family: monospace; ">
+</div>
+<p>
+The result is a set of files:
+</p>
+<table style="padding: 1cm;">
+  <tr>
+    <th>Filename:</th><th>Description:</th>
+  </tr>
+  <tr>
+    <td>bnfcGenNAME/LexTokens.py</td><td>Provides PLY with the information needed to build the lexer.</td>
+  </tr>
+  <tr>
+    <td>bnfcGenNAME/Absyn.py</td><td>Provides the classes for the abstract syntax.</td>
+  </tr>
+  <tr>
+    <td>bnfcGenNAME/ParserDefs.py</td><td>Provides PLY with the information needed to build the parser.</td>
+  </tr>
+  <tr>
+    <td>bnfcGenNAME/PrettyPrinter.py</td><td>Provides printing for both the AST and the linearized tree.</td>
+  </tr>
+  <tr>
+    <td>genTest.py</td><td>A ready test-file, that uses the generated frontend to convert input into an AST.</td>
+  </tr>
+  <tr>
+    <td>skele.py</td><td>Provides skeleton code to deconstruct an AST, using structural pattern matching.</td>
+  </tr>
+</table>
+
+<h3>Testing the frontend</h3>
+<p>
+  The following example uses a frontend that is generated from a C-like grammar.
+</p>
+<p style="font-family: monospace;">
+  $ python3.10 genTest.py < hello.c
+</p>
+<p style="font-family: monospace;">
+  Generating LALR tables<br>
+  Parse Successful!<br>
+  <br>
+  [Abstract Syntax]<br>
+  (PDefs [(DFun Type_int "main" [] [(SExp (EApp "printString" [(EString "Hello world")])), (SReturn (EInt 0))])])<br>
+  <br>
+  [Linearized Tree]<br>
+  int main ()<br>
+  {<br>
+    &nbsp;printString ("Hello world");<br>
+    &nbsp;return 0;<br>
+  }<br>
+</p>
+<p>
+  The LALR tables are cached in a file called "parsetab.py", and a description by PLY of the grammar is stored in a file called "parser.out".
+</p>
+<h3>The Abstract Syntax Tree</h3>
+<p>
+  The AST is built up using instances of Python classes, using the dataclass decorator, such as:
+</p>
+<p style="font-family: monospace;">
+@dataclass<br>
+class EAdd:<br>
+&nbsp;exp_1: Exp<br>
+&nbsp;exp_2: Exp<br>
+&nbsp;_ann_type: _AnnType = field(default_factory=_AnnType)
+</p>
+<p>
+  The "_ann_type" variable is a placeholder that can be used to store useful information,
+  for example type-information in order to create a type-annotated AST.
+</p>
+<h3>Using the skeleton file</h3>
+<p>
+  The skeleton file serves as a template, to create an interpreter for example.
+  Two different types of matchers are generated: the first with all the value
+  categories together, and a second type where each matcher only has one
+  individual value category, as in the example below:
+</p>
+<p style="font-family: monospace;">
+def matcherExp(exp_: Exp):<br>
+&nbsp;match exp_:<br>
+&nbsp;&nbsp;case EAdd(exp_1, exp_2, _ann_type):<br>
+&nbsp;&nbsp;&nbsp;# Exp "+" Exp1<br>
+&nbsp;&nbsp;&nbsp;raise Exception('EAdd not implemented')<br>
+&nbsp;&nbsp;case ESub(exp_1, exp_2, _ann_type):<br>
+&nbsp;&nbsp;&nbsp;...
+</p>
+<p>
+  This can be modified, in order to return the addition of each evaluated argument
+  category, into:
+</p>
+<p style="font-family: monospace;">
+  def matcherExp(exp_: Exp):<br>
+  &nbsp;match exp_:<br>
+  &nbsp;&nbsp;case EAdd(exp_1, exp_2, _ann_type):<br>
+  &nbsp;&nbsp;&nbsp;# Exp "+" Exp1<br>
+  &nbsp;&nbsp;&nbsp;return matcherExp(exp_1) + matcherExp(exp_2)<br>
+  &nbsp;&nbsp;case ESub(exp_1, exp_2, _ann_type):<br>
+  &nbsp;&nbsp;&nbsp;...
+</p>
+<p>
+  The function can now be imported and used in the generated test file 
+  (similarly to how the pretty printer is imported and used):
+</p>
+<p style="font-family: monospace;">
+  from skele import matcherExp<br>
+  ...<br>
+  print(matcherExp(ast))
+</p>
+  
+<h3>Known issues</h3>
+<h4>
+  Presentation of conflicts in a grammar:
+</h4>
+<p>
+  A symbol-to-unicode transformation is made for the terminals in the grammar,
+  for example from "++" to "S_43_43". This however obfuscates PLYs generated
+  information of the grammar, inside the "parser.out" file. Users are hence
+  encouraged to use say the Haskell backend to debug their
+  grammars and identify conflicts.
+</p>
+<h4>
+  Several entrypoints:
+</h4>
+<p>
+  At the top of the ParserDefs.py file an additional rule is added, that has
+  every defined entrypoint as a possible production. This may create warnings 
+  for conflicts if it introduces ambiguity, and warnings for unused rules if 
+  the "_Start" category is not used as the entrypoint. Therefore the added
+  parsing rule is by default removed beneath the function, "del p__Start",
+  and included if the user comments out the removal:
+</p>
+<h4>
+  Skeleton code for using lists as entrypoints:
+</h4>
+<p>
+  Matchers for using lists, such as [Exp], are not generated in the
+  skeleton code as it may confuse users if the grammar uses several different 
+  list categories. Users are instead encouraged to use a non-list entrypoint. 
+</p>
+<p>
+  The improper way to iterate over lists, as the value category is unknown:
+</p>
+<p style="font-family: monospace;">
+  &nbsp;case list():<br>
+  &nbsp;&nbsp;for ele in ast:<br>
+  &nbsp;&nbsp;&nbsp;...
+</p>
+<p>
+  The proper way to deconstruct lists, where we know the value category:
+</p>
+<p style="font-family: monospace;">
+  &nbsp;case RuleName(listexp_):<br>
+  &nbsp;&nbsp;for exp in listexp_:<br>
+  &nbsp;&nbsp;&nbsp;...
+</p>
+<h4>
+  Special cases for special characters
+</h4>
+<p>
+  Using non-special characters instead of say parentheses when defining rules, may not yield the expected
+  behaviour. Using the below rule, an expression such as "a1+2a" can not be parsed.
+</p>
+<p style="font-family: monospace;">
+  _. Exp1 ::= "a" Exp "a" ;
+</p>
+<h4>
+  Using multiple separators
+</h4>
+<p>
+  Using multiple separators for the same category, such as below, generates
+  Python functions with overlapping names, causing runtime errors.
+</p>
+<p style="font-family: monospace;">
+  separator Exp1 "," ;<br>
+  separator Exp1 ";" ;
+</p>
\ No newline at end of file
diff --git a/source/BNFC.cabal b/source/BNFC.cabal
index 7300a8d2..c232b401 100644
--- a/source/BNFC.cabal
+++ b/source/BNFC.cabal
@@ -280,6 +280,15 @@ library
     BNFC.Backend.TreeSitter.CFtoTreeSitter
     BNFC.Backend.TreeSitter.RegToJSReg
 
+    -- Python backend
+    BNFC.Backend.Python
+    BNFC.Backend.Python.CFtoPyAbs
+    BNFC.Backend.Python.CFtoPyLex
+    BNFC.Backend.Python.CFtoPyPrettyPrinter
+    BNFC.Backend.Python.RegToFlex
+    BNFC.Backend.Python.PyHelpers
+    BNFC.Backend.Python.CFtoPySkele
+    
 ----- Testing --------------------------------------------------------------
 
 test-suite unit-tests
diff --git a/source/main/Main.hs b/source/main/Main.hs
index 754bf268..6377611f 100644
--- a/source/main/Main.hs
+++ b/source/main/Main.hs
@@ -26,6 +26,7 @@ import BNFC.Backend.Latex
 import BNFC.Backend.OCaml
 import BNFC.Backend.Pygments
 import BNFC.Backend.TreeSitter
+import BNFC.Backend.Python
 import BNFC.CF (CF)
 import BNFC.GetCF
 import BNFC.Options hiding (make, Backend)
@@ -83,3 +84,5 @@ maketarget = \case
     TargetPygments     -> makePygments
     TargetCheck        -> error "impossible"
     TargetTreeSitter   -> makeTreeSitter
+    TargetPython       -> makePython
+    
\ No newline at end of file
diff --git a/source/src/BNFC/Backend/Python.hs b/source/src/BNFC/Backend/Python.hs
new file mode 100644
index 00000000..e0a3da35
--- /dev/null
+++ b/source/src/BNFC/Backend/Python.hs
@@ -0,0 +1,149 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+{-
+    BNF Converter: Python main file
+    Copyright (C) 2004  Author:  Bjorn Werner
+-}
+
+module BNFC.Backend.Python (makePython) where
+
+import Prelude hiding ((<>))
+import System.FilePath ((</>))
+import BNFC.CF (CF, firstEntry)
+import BNFC.Options (SharedOptions, optMake, lang)
+import BNFC.Backend.Base (MkFiles, mkfile)
+import BNFC.Backend.Python.CFtoPyAbs (cf2PyAbs)
+import BNFC.Backend.Python.CFtoPyLex (cf2PyLex)
+import BNFC.Backend.Python.CFtoPyPrettyPrinter (cf2PyPretty)
+import BNFC.Backend.Python.CFtoPySkele (cf2PySkele)
+import BNFC.Backend.Python.PyHelpers
+
+import BNFC.PrettyPrint -- For Doc
+import qualified BNFC.Backend.Common.Makefile as Makefile
+
+-- | Entrypoint for BNFC to use the Python backend.
+makePython :: SharedOptions -> CF -> MkFiles ()
+makePython opts cf = do
+    let pkgName =  "bnfcPyGen" ++ name
+    let (lexerDefs, tokensPly) = cf2PyLex cf
+    let (parsingDefs, abstractClasses) = cf2PyAbs pkgName cf tokensPly
+    let prettyPrinter = cf2PyPretty pkgName cf
+    let skeletonCode = cf2PySkele pkgName cf
+    mkPyFile (pkgName ++ "/LexTokens.py") lexerDefs
+    mkPyFile (pkgName ++ "/ParsingDefs.py") parsingDefs
+    mkPyFile (pkgName ++ "/Absyn.py") abstractClasses
+    mkPyFile (pkgName ++ "/PrettyPrinter.py") prettyPrinter
+    mkPyFile "skele.py" skeletonCode
+    mkPyFile "genTest.py" (pyTest pkgName cf)
+    Makefile.mkMakefile (optMake opts) $ makefile pkgName (optMake opts)
+  where
+    name :: String
+    name = lang opts
+    mkPyFile x = mkfile x comment
+
+
+-- | A makefile with distclean and clean specifically for the testsuite. No
+--   "all" is needed as bnfc has already generated the necessary Python files.
+makefile :: String -> Maybe String -> String -> Doc
+makefile pkgName optMakefileName basename = vcat
+  [
+    Makefile.mkRule "all" []
+      [ " " ]
+  ,  Makefile.mkRule "clean" []
+      [ "rm -f parser.out parsetab.py" ]
+  , Makefile.mkRule "distclean" [ "vclean" ] []
+  , Makefile.mkRule "vclean" []
+      [ "rm -f " ++ unwords
+        [ 
+          pkgName ++ "/LexTokens.py",
+          pkgName ++ "/ParsingDefs.py",
+          pkgName ++ "/Absyn.py",
+          pkgName ++ "/PrettyPrinter.py",
+          pkgName ++ "/LexTokens.py.bak",
+          pkgName ++ "/ParsingDefs.py.bak",
+          pkgName ++ "/Absyn.py.bak",
+          pkgName ++ "/PrettyPrinter.py.bak",
+          "skele.py",
+          "genTest.py",
+          "skele.py.bak",
+          "genTest.py.bak"
+        ],
+        "rm -f " ++ pkgName ++ "/__pycache__/*.pyc",
+        "rm -fd " ++ pkgName ++ "/__pycache__",
+        "rmdir " ++ pkgName,
+        "rm -f __pycache__/*.pyc",
+        "rm -fd __pycache__",
+        "rm -f parser.out parsetab.py",
+        "rm -f " ++ makefileName,
+        "rm -f " ++ makefileName ++ ".bak"
+      ]
+  ]
+  where
+    makefileName = case optMakefileName of
+      Just s -> s
+      Nothing -> "None" -- No makefile will be created.
+
+
+-- | Put string into a comment.
+comment :: String -> String
+comment x = "# " ++ x
+
+
+-- Produces the content for the testing file, genTest.py.
+pyTest :: String -> CF -> String
+pyTest pkgName cf = unlines 
+  [
+    "from ply.lex import lex",
+    "from ply.yacc import yacc",
+    "import sys",
+    "from " ++ pkgName ++ ".LexTokens import *",
+    "from " ++ pkgName ++ ".ParsingDefs import *",
+    "from " ++ pkgName ++ ".PrettyPrinter import *",
+    "",
+    "",
+    "# Suggested input options:",
+    "# python3.10 genTest.py < sourcefile",
+    "# python3.10 genTest.py sourcefile inputfile (i.e. for interpreters).",
+    "inputFile = None",
+    "if len(sys.argv) > 1:",
+    "\tf = open(sys.argv[1], 'r')",
+    "\tinp = f.read()",
+    "\tf.close()",
+    "\tif len(sys.argv) > 2:",
+    "\t\tinputFile = sys.argv[2]",
+    "else:",
+    "\tinp = ''",
+    "\tfor line in sys.stdin:",
+    "\t\tinp += line",
+    "",
+    "",
+    "# Customizable error handling for the parsing",
+    "def p_error(p: lex.LexToken):",
+      "\tif p is None:",
+        "\t\tprint('No rule could reduce the tokenized input')",
+      "\telse:",
+        "\t\tprint('line:', p.lineno, 'lexpos:', p.lexpos, f'Syntax error at {p.value!r}')",
+        "\t\tp.lexer.syntaxError = True",
+    "",
+    "",
+    "# By default the first entrypoint is used. See ParsingDefs.py for alternatives.",
+    "lexer = lex.lex()",
+    "parser = yacc(start=" ++ defaultEntry ++ ")",
+    "lexer.syntaxError = False",
+    "ast = parser.parse(inp, lexer=lexer)",
+    "if ast and not lexer.syntaxError:",
+      "\tprint('Parse Successful!\\n')",
+      "\tprint('[Abstract Syntax]')",
+      "\tprint(printAST(ast))",
+      "\tprint('\\n[Linearized Tree]')",
+      "\tlinTree = lin(ast)",
+      "\tprint(renderC(linTree))",
+      "\tprint()",
+    "else:",
+      "\tprint('Parse failed')",
+      "\tquit(1)"
+  ]
+  where
+    defaultEntry = (addCitationSigns . translateToList . show . firstEntry) cf
+
diff --git a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
new file mode 100644
index 00000000..a4712fb0
--- /dev/null
+++ b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
@@ -0,0 +1,382 @@
+
+{-  
+    BNF Converter: Python abstract syntax and parsing definitions generator
+    Copyright (C) 2024  Author: Bjorn Werner
+    Based on CFtoCAbs.hs, Copyright (C) 2004 Michael Pellauer
+-}
+
+module BNFC.Backend.Python.CFtoPyAbs (cf2PyAbs) where
+import Data.List     ( nub, intercalate )
+import BNFC.CF
+import BNFC.Backend.Python.PyHelpers
+import BNFC.Backend.Common.NamedVariables
+import Text.PrettyPrint (Doc, render)
+import Data.Either   (lefts)
+import Data.Char                    (toLower)
+import qualified Data.List.NonEmpty as List1
+
+-- | The result is ParsingDefs.py & Absyn.py
+cf2PyAbs
+  :: String
+  -> CF     -- ^ Grammar.
+  -> [(String, String)] -- Tokens to unicode mapping
+  -> (String, String) -- ParsingDefs.py, Absyn.py.
+cf2PyAbs pkgName cf tokensPly = ( unlines 
+  [ "from " ++ pkgName ++ ".Absyn import *"
+  , "\n\n" ++ createCommonEntrypointDef cf
+  , "\n\n" ++ (unlines parsingDefs)
+  , if length definesParsingDefs > 0 
+    then "\n\n# Parsing rules from defines"
+    else ""
+  , "\n\n" ++ unlines definesParsingDefs
+  ]
+  , "from typing import List as _List" ++ 
+    "\n\n# Value categories (no coercsions):" ++
+    "\n\n" ++ unlines valueCatsClasses ++ 
+    "\n\n" ++ placeholderVariableClass ++ 
+    "\n\n# Rules:" ++
+    "\n" ++ "from dataclasses import dataclass, field" ++
+    "\n\n" ++ (unlines dataClasses)
+  )
+  where
+    rules = cfgRules cf
+
+    -- To create ParsingDefs.py 
+    parsingDefs :: [String]
+    parsingDefs = map (ruleToParsingDef cf tokensPly)
+      [r | r <- rules, isParsable r, not (isDefinedRule r)]
+    
+    definesParsingDefs = makeDefineParsingDefs cf tokensPly
+
+    -- To create Absyn.py
+    dataClasses :: [String]
+    dataClasses = map makePythonClass
+      [ r | r <- rules, not (isDefinedRule r)
+      , not (isNilCons r)
+      , not (isCoercion r)
+      ]
+
+    rulesNoListConstructors = 
+      [r | r <- (cfgRules cf), not (isNilCons r), not (isCoercion r) ]
+    
+    -- Note: Custom tokens are set to inherit "str".
+    valueCatNames = nub $ 
+      (map (show . normCat . valCat) rulesNoListConstructors) ++ 
+      (map (++"(str)") (tokenNames cf)) ++ 
+      [ "String(str)"
+      , "Char(str)"
+      , "Ident(str)"
+      , "Integer(int)"
+      , "Double(float)"
+      ]
+
+    valueCatsClasses = map createValueCatClass valueCatNames
+    
+
+placeholderVariableClass :: String
+placeholderVariableClass = unlines 
+  [ "# Placeholder to add additional information to a node in the AST," ++
+      " like type information."
+  , "class _AnnType:"
+  , "  def __init__(self):"
+  , "    self.__v = None"
+  , ""
+  , "  def s(self, val):"
+  , "    if not self.__v == None:"
+  , "      if self.__v != val:"
+  , "        raise Exception('already has type: ' + str(self.__v)" ++
+      " + ' and tried to set to ' + str(val))"
+  , "    self.__v = val"
+  , ""
+  , "  def g(self):"
+  , "    return self.__v"
+  , ""
+  , "  def __str__(self):"
+  , "    return str(self.__v.__class__)"
+  , ""
+  , "  def __repr__(self):"
+  , "    return str(self.__v.__class__)"
+  ]     
+
+-- | Creates a parsing definition that points to all entrypoints.
+createCommonEntrypointDef :: CF -> String
+createCommonEntrypointDef cf = unlines 
+  [ "def p__Start(p):"
+  , "  '''"
+  , "  _Start : " ++ (translateToList . show . head) cats ++
+    concat (map createCase (tail cats))
+  , "  '''"
+  , "  p[0] = p[1]"
+  , ""
+  , ""
+  , "# Comment the below line to enable the '_Start' entrypoint (may yield"
+    ++ " conflict warnings)."
+  , "del p__Start"
+  , ""
+  ]
+  where
+    cats = (List1.toList . allEntryPoints) cf
+
+    createCase :: Cat -> String
+    createCase c = "\n         | " ++ translateToList (show c)
+
+
+-- | The value categories become abstract classes, for type hinting.
+createValueCatClass :: String -> String
+createValueCatClass s = "class " ++ s ++ ":\n\tpass\n"
+
+
+-- | Creates a parsing definition, by checking what type of rule it is and
+--   calling the corresponding make function.
+ruleToParsingDef :: CF -> [(String, String)] -> Rul RFun -> String
+ruleToParsingDef cf tokensPly rule 
+  | isCoercion funcRStr = 
+    makeParseCoercion cf tokensPly funcCat (fName, sentForm)
+  | isNilFun funcRStr = 
+    makeParseNil tokensPly funcCat (fNameTranslated, sentForm)
+  | isOneFun funcRStr = 
+    makeParseOne cf tokensPly funcCat (fNameTranslated, sentForm)
+  | isConsFun funcRStr = 
+    makeParseCons cf tokensPly funcCat (fNameTranslated, sentForm)
+  | isDefinedRule rule = 
+    error "Should not generate define rules in this step"
+  | otherwise = 
+    makeParseFunc cf tokensPly funcCat (fName, sentForm)
+  where
+    funcRStr = funRule rule :: RString
+    fName = wpThing funcRStr :: String
+
+    funcCat = valCat rule :: Cat
+    catStr = show (valCat rule) :: String
+
+    fNameTranslated :: String
+    fNameTranslated
+      | isNilFun funcRStr = catStr
+      | otherwise = fName
+
+    sentForm = rhsRule rule :: [Either Cat String]
+
+
+-- | Make a Python class from a rule's name and production.
+makePythonClass :: Rul RFun -> String
+makePythonClass rule = 
+  "@dataclass\n" ++
+  "class " ++ name ++ ":\n" ++
+    if length cats == 0 then "\tpass\n" else classBody
+  where
+    name = funName rule
+    sentForm = rhsRule rule
+    cats = lefts sentForm
+    nvCats = numVars sentForm :: [Either (Cat, Doc) String]
+
+    enumeratedVarsWithType = [render d ++ ": " ++ 
+      strCatToPyTyping (show (normCat c)) | (c, d) <- lefts nvCats]
+
+    classBody = unlines $ map ("\t" ++) (enumeratedVarsWithType ++ 
+      ["_ann_type: _AnnType = field(default_factory=_AnnType)"])
+
+
+
+-- | Creates the corresponding type hinting for some member variable.
+strCatToPyTyping :: String -> String
+strCatToPyTyping s = 
+  if strIsList s then "_List['" ++ (tail . init) s ++ "']" else s
+
+
+-- | It could be this is only guarding against list categories.
+literalsToPytypeMaybe :: CF -> String -> Maybe String
+literalsToPytypeMaybe cf s = case s of
+  "Integer" -> Just "Integer"
+  "Double" -> Just "Double"
+  "Char"   -> Just "Char"
+  "String" -> Just "String"
+  "Ident" -> Just "Ident"
+  _ -> if s `elem` (tokenNames cf) then Just s else Nothing
+
+
+-- | The following makeParse functions create their corresponding parsing 
+--   definitions for some rule.
+makeParseFunc :: CF -> [(String, String)] -> Cat -> (String, SentForm)
+  -> String
+makeParseFunc cf tokensPly dataCat (name, sentForm) = unlines 
+  [ "def " ++ "p_" ++ name ++ "(p):\n" ++ "\t" ++ "\"\"\""
+  , "\t" ++ (show dataCat) ++ " : " ++ (prodToDocStr tokensPly sentForm)
+  , "\t" ++ "\"\"\""
+  , "\t" ++ "p[0] = " ++ rhs ++ "\n"
+  ]
+  where
+    rhs =  name ++ "(" ++ (addCommas (getLeftIndexes cf 1 sentForm)) ++  ")"
+
+
+makeParseCoercion :: CF -> [(String, String)] -> Cat -> (String, SentForm)
+  -> String
+makeParseCoercion cf tokensPly dataCat (_, sentForm) = unlines
+  [ "def " ++ "p_" ++ (show sourceCat) ++ "(p):\n" ++ "\t" ++ "\"\"\""
+  , "\t" ++ (show dataCat) ++ " : " ++ (prodToDocStr tokensPly sentForm)
+  , "\t" ++ "\"\"\""
+  , "\t" ++ "p[0] = " ++ strP ++ "\n"
+  ]
+  where
+    strP = head (getLeftIndexes cf 1 sentForm)
+    sourceCat = (head . lefts) sentForm
+
+
+makeParseNil :: [(String, String)] -> Cat -> (String, SentForm) -> String
+makeParseNil tokensPly dataCat (_, sentForm) = unlines
+  [ "def " ++ "p_" ++ "Nil" ++ translatedCat ++ "(p):\n" ++ "\t" ++ "\"\"\""
+  , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm)
+  , "\t" ++ "\"\"\""
+  , "\t" ++ "p[0] = []\n"
+  ]
+  where
+    translatedCat = translateToList $ show dataCat
+
+
+makeParseOne :: CF -> [(String, String)] -> Cat -> (String, SentForm) -> String
+makeParseOne cf tokensPly dataCat (_, sentForm) = unlines
+  [ "def " ++ "p_" ++ "One" ++ translatedCat ++ "(p):\n" ++ "\t" ++ "\"\"\""
+  , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm)
+  , "\t" ++ "\"\"\""
+  , "\t" ++ "p[0] = " ++ rhs ++ "\n"
+  ]
+  where
+    translatedCat = translateToList $ show dataCat
+    rhs = intercalate " + " (getLeftIndexesLists tokensPly cf 1 sentForm)
+
+
+makeParseCons :: CF -> [(String, String)] -> Cat -> (String, SentForm)
+  -> String
+makeParseCons cf tokensPly dataCat (_, sentForm) = unlines
+  [ "def " ++ "p_" ++ "Cons" ++ translatedCat ++ "(p):\n" ++ "\t" ++ "\"\"\""
+  , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm)
+  , "\t" ++ "\"\"\"" ++ "\n"
+  , "\t" ++ "p[0] = " ++ rhs ++ "\n"
+  ]
+  where
+    translatedCat = translateToList $ show dataCat
+    rhs = intercalate " + " (getLeftIndexesLists tokensPly cf 1 sentForm)
+
+
+-- | Produces a list of the elements in the code production, where the indices
+--   match the argument categories.
+getLeftIndexesLists :: [(String, String)] -> CF -> Int -> [Either Cat String]
+  -> [String]
+getLeftIndexesLists _ _ _ [] = []
+getLeftIndexesLists tokensPly cf n (Left c:ecs)
+  | isList c = [typedPTerm] ++ (getLeftIndexesLists tokensPly cf (n+1) ecs)
+  | otherwise = ["[" ++ typedPTerm ++ "]"] ++ 
+    (getLeftIndexesLists tokensPly cf (n+1) ecs)
+  where
+    pTerm = "p[" ++ (show n) ++ "]"
+    typedPTerm = case literalsToPytypeMaybe cf (show c) of
+      Just s -> s ++ "(" ++ pTerm ++ ")"
+      Nothing -> pTerm
+getLeftIndexesLists tokensPly cf n (Right strOp:ecs)
+  | separatorIsEmpty tokensPly strOp = getLeftIndexesLists tokensPly cf n ecs
+  | otherwise = getLeftIndexesLists tokensPly cf (n+1) ecs
+
+
+-- | In case the deliminator is "" or is not defined for the lexer, like
+--   ignored characters.
+separatorIsEmpty :: [(String, String)] -> String -> Bool
+separatorIsEmpty tokensPly strOp
+  | length strOp > 0 = case lookup strOp tokensPly of
+    Just _ -> False
+    Nothing -> True
+  | otherwise = True
+
+
+-- | Produces a list of the elements in the code production, where the indices
+--   match the argument categories.
+getLeftIndexes :: CF -> Int -> [Either Cat String] -> [String]
+getLeftIndexes _ _ [] = []
+getLeftIndexes cf n (Left c:ecs) = [typedPTerm] ++ 
+  (getLeftIndexes cf (n+1) ecs)
+  where
+    pTerm = "p[" ++ (show n) ++ "]"
+    typedPTerm = case literalsToPytypeMaybe cf (show c) of
+      Just s -> s ++ "(" ++ pTerm ++ ")"
+      Nothing -> pTerm
+getLeftIndexes cf n (Right _:ecs) = getLeftIndexes cf (n+1) ecs
+
+
+-- | Produces the production in the docstring for the parsing definitions.
+prodToDocStr :: [(String, String)] -> [Either Cat String] -> String
+prodToDocStr _ [] = ""
+prodToDocStr tokensPly (ec:[]) = ecsToDocStr tokensPly ec
+prodToDocStr tokensPly (ec:ecs) = 
+  ecsToDocStr tokensPly ec ++ " " ++ prodToDocStr tokensPly ecs
+
+
+-- Converts a single element in the production.
+ecsToDocStr :: [(String, String)] -> Either Cat String -> String
+ecsToDocStr _ (Left c)       = translateToList $ show c
+ecsToDocStr tokensPly (Right strOp)  = case lookup strOp tokensPly of
+  (Just s) -> s
+  Nothing -> ("") -- We assume it is no token, this affects getLeftIndexes
+
+
+-- | Creating the parsing definitions for the defines.
+makeDefineParsingDefs :: CF ->  [(String, String)] -> [String]
+makeDefineParsingDefs cf tokensPly = defFuncsPy
+  where
+    rules = cfgRules cf
+    
+    definedRules :: [Rul RFun]
+    definedRules = [r | r <- rules, isDefinedRule r]
+  
+    pairs :: [(Rul RFun, Define)]
+    pairs = [(dr, d) | dr <- definedRules, d <- definitions cf, 
+      nameCorresponds ((wpThing . defName) d) (funName dr)]
+    
+    -- Adds a number to the name to make each define separate.
+    numberedPairs = zip [1..] pairs
+    defFuncsPy = map (makeDefineParsingDef cf tokensPly) numberedPairs 
+
+
+-- | To compare names for defines. The first letter needs to be lowered, so
+--   "while" == "While".
+nameCorresponds :: String -> String -> Bool
+nameCorresponds (x:xs) (y:ys) = (toLower x == toLower y) && (xs == ys)
+nameCorresponds _ _ = error "Names can't be empty"
+
+
+-- | Creates a define parsing definition.
+makeDefineParsingDef :: 
+  CF -> [(String, String)] -> (Int, (Rul RFun, Define)) -> String
+makeDefineParsingDef cf tokensPly (n, (defRule, defi))  = unlines
+  [ "def p_D" ++ (show n) ++ name ++ "(p):"
+  , "\t\"\"\""
+  , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm)
+  , "\t\"\"\""
+  , "\t# " ++ show env
+  , "\tp[0] = " ++ expToDef env (defBody defi)
+  , ""
+  ]
+  where
+    name = (wpThing . defName) defi
+    translatedCat = translateToList $ (catToStr . valCat) defRule
+    sentForm = rhsRule defRule
+    indexes = getLeftIndexes cf 1 sentForm
+    args = map fst (defArgs defi)
+    env = zip args indexes
+
+
+-- | Converts the production of a define, called an expression, to a
+--   production for the parsing definition.
+expToDef :: [(String, String)] -> Exp -> String
+expToDef env (App "(:)" _ (e:[App "[]" _ _])) = expToDef env e ++ "]"
+expToDef env (App "(:)" _ (e:[recList])) = "[" ++ expToDef env e ++ ", " ++
+  expToDef env recList
+expToDef _ (App "[]" _ _) = "[]"
+expToDef env (App fName _ exps) = 
+  fName ++ "(" ++ addCommas (map (expToDef env) exps) ++ ")"
+expToDef env (Var s) = case lookup s env of
+  Just p -> p
+  Nothing -> error "Missing variable in define enviroment"
+expToDef _ (LitInt i) = "Integer(" ++ show i ++ ")"
+expToDef _ (LitDouble d) = "Double(" ++ show d ++ ")"
+expToDef _ (LitChar s) = "Char(\"" ++ show s ++ "\")"
+expToDef _ (LitString s) = "String('" ++ show s ++ "')"
+
+
diff --git a/source/src/BNFC/Backend/Python/CFtoPyLex.hs b/source/src/BNFC/Backend/Python/CFtoPyLex.hs
new file mode 100644
index 00000000..fd1a532a
--- /dev/null
+++ b/source/src/BNFC/Backend/Python/CFtoPyLex.hs
@@ -0,0 +1,189 @@
+
+{-  
+    BNF Converter: Python lexer generator
+    Copyright (C) 2024  Author: Bjorn Werner
+-}
+
+module BNFC.Backend.Python.CFtoPyLex ( cf2PyLex ) where
+
+import BNFC.CF
+
+import BNFC.Backend.Python.RegToFlex (printRegFlex, escapeChar)
+import BNFC.Backend.Python.PyHelpers
+
+
+-- | The entrypoint, returns LexTokens.py and the unicode mapping.
+cf2PyLex :: CF -> (String, [(String, String)])
+cf2PyLex cf = (, tokensPly) $ unlines 
+  [ "import ply.lex as lex\n"
+  , ""
+  , createReservedMap reservedWordsEnv
+  , "# PLY tokens:\n" ++ plyTokens ++ "\n"
+  , "# PLY tokens with RegEx:"
+  , unlines plyTokensRegEx
+  , "# Literals:"
+  , plyLiterals cf
+  , "# Comments:"
+  , unlines singleComments
+  , unlines multiComments
+  , footer
+  ]
+  where
+    -- The reserved keywords and the symbols are zipped with a
+    -- unicode representation, which are needed for the parsing.
+
+    -- Reserved keywords -> [("int", "R_...")]
+    reservedWordsVar :: [String]
+    reservedWordsVar = reservedWords cf
+
+    reservedWordsEnv :: [(String, String)]
+    reservedWordsEnv = 
+      zip reservedWordsVar (map (("R" ++) . toOrd) reservedWordsVar)
+
+    -- Symbols -> [("+", "S_43")]
+    literalsVar :: [String]
+    literalsVar = literals cf
+
+    strOps :: [String]
+    strOps = map fst (cfTokens cf)
+
+    strOpsFiltered = filterOut strOps reservedWordsVar
+    strOpsFilteredSymbols = map (("S" ++) . toOrd) strOpsFiltered
+    
+    strOpsAndSymbols :: [(String, String)]
+    strOpsAndSymbols = zip strOpsFiltered strOpsFilteredSymbols
+
+    presentSymbols :: [String]
+    presentSymbols = 
+      map addCitationSigns (strOpsFilteredSymbols ++ literalsVar)
+
+    -- Defining the variables for the lexer.
+    plyTokens =
+      "tokens = reserved + (" ++ concat (map (++ ",") presentSymbols) ++ ")"
+    plyTokensRegEx = map createRegEx strOpsAndSymbols
+    
+    tokensPly :: [(String, String)]
+    tokensPly = reservedWordsEnv ++ strOpsAndSymbols 
+    
+    -- Comments
+    (multiMatchers, singleMatchers) = comments cf
+    singleComments = map createLineCommentMatcher singleMatchers
+    multiComments = map createMultiLineCommentMatcher multiMatchers
+
+
+-- | Creates tokens for the lexer, such as "t_S_43 = r'\+'".
+createRegEx :: (String, String) -> String
+createRegEx (s, u) = "t_" ++ u ++ " = r'" ++ concat (map escapeChar s) ++ "'"
+
+
+-- | For single-line comments
+createLineCommentMatcher :: String -> String
+createLineCommentMatcher r = unlines
+  [ "def t_C" ++ (toOrd r) ++ "(t):"
+  , "\tr'" ++ concat (map escapeChar r) ++ ".*'"
+  , "\tpass"
+  ]
+
+
+-- | For multi-line comments
+createMultiLineCommentMatcher :: (String, String) -> String
+createMultiLineCommentMatcher (s, e) = unlines
+  [ "def t_C" ++ (toOrd (s ++ e)) ++ "(t):"
+  , "\tr'" ++ (escaped s) ++ "([\\s\\S]*?)" ++ (escaped e) ++ "'"
+  , "\tpass"
+  ]
+  where
+    escaped s = concat $ map escapeChar s
+
+
+-- | The reserved_map contains mappings for reserved keywords,
+--   such as 'int' : 'R_105_110_116'.
+createReservedMap :: [(String, String)] -> String
+createReservedMap xs = unlines 
+  [ "reserved_map = {"
+  , unlines rows
+  , "}"
+  , ""
+  , "reserved = ("
+  , unlines rowsSnd
+  , ")"
+  ]
+  where
+    rows :: [String]
+    rows = ["\t'" ++ w ++ "' : '" ++ u ++ "'," | (w, u) <- xs]
+
+    rowsSnd = ["\t'" ++ u ++ "'," | (_, u) <- xs]
+
+
+-- | Creates lexer definitions for the lexer which are interpreted using
+--   the inspect module to retrieve useful information, for example:
+--   def t_String(t):
+--     r'"[^"]+"'
+--     t.type = reserved_map.get(t.value, ’String’)
+--     return t
+plyLiterals :: CF -> String
+plyLiterals cf = unlines $ concat
+  [ 
+  ifC catString  [createLexFunc "String" "\"(\\\\\"|[^\"])*\""]
+  , ifC catChar    
+    [createLexFunc "Char" "\\'(\\\\x[0-9a-f][0-9a-f]|\\\\?[\\S\\s])\\'"]
+  , ifC catDouble  [createLexFunc "Double" "\\d+\\.\\d+(e-?\\d+)?"]
+  , ifC catInteger [createLexFunc "Integer" "\\d+"]
+  -- Prolog requires user defined tokens to have priority over Ident; C 
+  -- requires Double to have priority over user defined tokens, as C has
+  -- "CDouble" matching "3." in 3.14. The lexer definitions rely on the order
+  -- for priority, not the length.
+  , userDefTokens 
+  , ifC catIdent   [createLexFunc "Ident" "[A-Za-z]\\w*"]
+  -- If there is no Ident present, we need a lexer definition for reserved
+  -- words:
+  , if not (isUsedCat cf (TokenCat catIdent)) && length (reservedWords cf) > 0
+    then [createLexFunc "" "[A-Za-z]\\w*"]
+    else []
+  ]
+  where
+  ifC :: TokenCat -> [String] -> [String]
+  ifC cat s = if isUsedCat cf (TokenCat cat) then s else []
+
+  userDefTokens :: [String] 
+  userDefTokens = [
+    createLexFunc name (printRegFlex exp) | (name, exp) <- tokenPragmas cf
+    ]
+
+
+-- | Creates a Lexing definition for a Literal
+--   If no Literal name is used, this is just a reserved_map lookup.
+createLexFunc :: String -> String -> String
+createLexFunc name regex = unlines 
+  [ "def t_" ++ (if name /= "" then name else "_NoIdentPresent") ++ "(t):"
+  , "\tr'" ++ regex ++ "'"
+  , if name /= ""
+    then "\tt.type = reserved_map.get(t.value, '" ++ name ++ "')"
+    else "\tt.type = reserved_map.get(t.value)"
+  , "\treturn t"
+  ]
+
+
+-- | Adds lexer definitions to ignore whitespaces, and a testing block
+--   which attempts tokenize some input, like:  python3 LexTokens.py < input
+footer :: String
+footer = unlines 
+  [ "# Ignored characters:"
+  , "t_ignore = ' \\t'"
+  , ""
+  , "# Ignored token with an action associated with it:"
+  , "def t_ignore_newline(t):"
+  , "\tr'\\n+'"
+  , "\tt.lexer.lineno += t.value.count('\\n')"
+  , ""
+  , "# Error handler for illegal characters:"
+  , "def t_error(t):"
+  , "\tprint('Illegal character', 'line', str(t.lineno) + ':', t.value[0], 'ascii:', ord(t.value[0]))"
+  , "\tquit()"
+  , ""
+  , "if __name__ == \"__main__\":"
+  , "\tlexer = lex.lex()"
+  , "\tlex.runmain(lexer)"
+  ]
+
+
diff --git a/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs b/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs
new file mode 100644
index 00000000..20255c19
--- /dev/null
+++ b/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs
@@ -0,0 +1,409 @@
+
+{-  
+    BNF Converter: Python pretty-printer generator
+    Copyright (C) 2024  Author: Bjorn Werner
+    Based on CFtoCPrinter.hs, Copyright (C) 2004 Michael Pellauer
+-}
+
+module BNFC.Backend.Python.CFtoPyPrettyPrinter ( cf2PyPretty ) where
+import Data.List     ( intercalate, nub )
+import BNFC.CF 
+import BNFC.Backend.Python.PyHelpers
+import BNFC.Backend.Common.NamedVariables
+import Text.PrettyPrint (Doc, render)
+import Data.Either   (lefts)
+import BNFC.Backend.Common.StrUtils
+import qualified Data.List.NonEmpty as List1
+
+-- | Used to create PrettyPrinter.py, that contains the functionality
+--   to print the AST and the linearized tree.
+cf2PyPretty :: String -> CF -> String
+cf2PyPretty pkgName cf = unlines
+  [ "from " ++ pkgName ++ ".Absyn import *"
+  , ""
+  , makePrintAST cf
+  , ""
+  , makeListDecons cf
+  , ""
+  , makeRenderC
+  , ""
+  , makeCoercCompare cf
+  , ""
+  , makeCompareFunc
+  , ""
+  , makeLinFunc cf
+  ]
+
+
+-- | Creates the print AST function.
+makePrintAST :: CF -> String
+makePrintAST cf = concat 
+  [ "def printAST(ast: object) -> list:\n"
+  , "  match ast:\n"
+  , concat 
+    [ ifUsedThen catInteger
+      [ "    case Integer():"
+      , "      return str(ast)"
+      ]
+    , ifUsedThen catDouble
+      [ "    case Double():"
+      , "      if ast.is_integer():"
+      , "        return str(int(ast))"
+      , "      else:"
+      , "        return str(ast)"
+      ]
+    , ifUsedThen catString
+      [ "    case String():"
+      , "      return str(ast)"
+      ]
+    , ifUsedThen catChar
+      [ "    case Char():"
+      , "      return str(ast)"
+      ]
+    , ifUsedThen catIdent
+      [ "    case Ident():"
+      , "      return '\"' + str(ast) + '\"'"
+      ]
+    ]
+  , if length (tokenNames cf) > 0
+    then unlines 
+      [ "    case (" ++ intercalate " | " (map (++"()") (tokenNames cf)) 
+        ++ "):"
+      , "      return '\"' + str(ast) + '\"'"
+      ]
+    else ""
+  , "    case list():\n"
+  , "      return '[' + ', '.join([printAST(a) for a in ast]) + ']'\n"
+  , "\n"
+  , "  if len(vars(ast)) > 0:\n"
+  , "    return '(' + ast.__class__.__name__ + ' ' + ' '.join([printAST(vars(ast)[k]) for k in vars(ast) if k != '_ann_type']) + ')'\n"
+  , "  else:\n"
+  , "    return ast.__class__.__name__\n"
+  ]
+  where
+    ifUsedThen :: TokenCat -> [String] -> String
+    ifUsedThen cat ss
+      | isUsedCat cf (TokenCat cat) = unlines ss
+      | otherwise = ""
+
+
+-- Creates deconstructors for all list categories.
+makeListDecons :: CF -> String
+makeListDecons cf = unlines $ map (makeListDecon cf) listCats
+    where
+        rules = cfgRules cf
+        valCats = nub $ map valCat rules
+        listCats = [c | c <- valCats, isList c]
+
+
+-- Creates a deconstructor for some list category.
+makeListDecon :: CF -> Cat -> String
+makeListDecon cf c = concat 
+  [ "def list" ++ name ++ "Decon(xs):\n"
+  , oneRuleStr
+  , nilRuleStr
+  , consRuleStr
+  , "\n"
+  ]
+  where
+    name = show $ catOfList c
+    listRulesForCat = rulesForCat cf c
+
+    nilRule = case [r | r <- listRulesForCat, isNilFun r] of
+      [] -> Nothing
+      rs -> Just (head rs)
+    oneRule = case [r | r <- listRulesForCat, isOneFun r] of
+      [] -> Nothing
+      rs -> Just (head rs)
+    consRule = case [r | r <- listRulesForCat, isConsFun r] of
+      [] -> Nothing
+      rs -> Just (head rs)
+
+    -- List rules are of the form:
+    -- [C] ::= symbols.. C symbols.. [C]
+    -- The production, in Python, is concatenated recursively:
+    -- symbols.. + lin(xs[0]) + symbols.. + listCDecon(xs[1:]) + symbols..
+    sentFormToArgs :: Int -> [Either Cat String] -> String
+    sentFormToArgs _ [] = "[]"
+    sentFormToArgs v (Right strOp:ecss) =
+      "['" ++ escapeChars strOp ++ "'] + " ++ 
+      sentFormToArgs v ecss
+    sentFormToArgs v (Left _:ecss)
+      | v == 0 = "lin(xs[0]) + " ++ sentFormToArgs (v+1) ecss
+      | v == 1 = "list" ++ name ++ "Decon(xs[1:]) + " ++ 
+      sentFormToArgs (v+1) ecss
+      | otherwise = error "A list production can max have C and [C]."
+
+    nilRuleStr = case nilRule of
+      Nothing -> ""
+      Just r -> unlines 
+        [ "  if len(xs) == 0:"
+        , "    return " ++ sentFormToArgs 0 (rhsRule r)
+        ]
+
+    oneRuleStr = case oneRule of
+      Nothing -> ""
+      Just r -> unlines 
+        [ "  if len(xs) == 1:"
+        , "    return " ++ sentFormToArgs 0 (rhsRule r)
+        ]
+
+    consRuleStr = case consRule of
+      Nothing -> ""
+      Just r -> "  return " ++ sentFormToArgs 0 (rhsRule r) ++ "\n"
+
+
+-- | Creates the renderC function, which creates a string of a list of
+--   strings, and inserts white-spaces to render the language in a C-like
+--   manner.
+makeRenderC :: String
+makeRenderC = unlines 
+  [ "def renderC(ss: list):"
+  , "  def br(i):"
+  , "    return '\\n' + '  ' * iLevel"
+  , ""
+  , "  def ident(i):"
+  , "    return '  ' * iLevel"
+  , ""
+  , "  def removeTrailingWhitespace(tot):"
+  , "    i = len(tot)"
+  , "    while i > 0:"
+  , "      if tot[i] == ' ':"
+  , "        i -= 1"
+  , "      else:"
+  , "        break"
+  , ""
+  , "    return tot[:i]"
+  , ""
+  , "  def oneEmptyLine(tot):"
+  , "    tot = tot.rstrip(' ')"
+  , "    if len(tot) > 0 and tot[-1] != '\\n':"
+  , "      tot += '\\n'"
+  , "    tot += ident(iLevel)"
+  , "    return tot"
+  , ""
+  , "  tot = ''"
+  , "  iLevel = 0"
+  , "  for i in range(len(ss)):"
+  , "    s = ss[i]"
+  , "    match s:"
+  , "      case '{':"
+  , "        tot = oneEmptyLine(tot)"
+  , "        iLevel += 1"
+  , "        tot += '{' + br(iLevel)"
+  , "      case ('(' | '['):"
+  , "        tot += s"
+  , "      case (')' | ']'):"
+  , "        tot = tot.rstrip()"
+  , "        tot += s + ' '"
+  , "      case '}':"
+  , "        iLevel -= 1"
+  , "        tot = oneEmptyLine(tot)"
+  , "        tot += s + br(iLevel)"
+  , "      case ',':"
+  , "        tot = tot.rstrip()"
+  , "        tot += s + ' '"
+  , "      case ';':"
+  , "        tot = tot.rstrip()"
+  , "        tot += s + br(iLevel)"
+  , "      case '':"
+  , "        tot += ''"
+  , "      case ' ':"
+  , "        tot += s"
+  , "      case _:"
+  , "        tot += s + ' '"
+  , ""
+  , "  return tot"
+  ]
+
+
+-- Provides a mapping from a rule to its value category.
+makeCoercCompare :: CF -> String
+makeCoercCompare cf = concat 
+  [ "cdict = {\n"
+  , unlines (map (\(fs, cs) -> "  " ++ fs ++ " : '" ++ cs ++ "',") scs)
+  , "}"
+  ]
+  where
+    scs :: [(String, String)]
+    scs = [(funName r, (show . wpThing . valRCat) r) | r <- cfgRules cf,
+      not (isCoercion r), not (isNilCons r), not (isDefinedRule r)]
+
+
+-- | Creates a function that attempts to figure out if
+--   parentheses are required, for example:
+--   1 + (2 * 3)
+--   The precedence for the addition is low, say Exp, but the multiplication
+--   has a higher precedence, say Exp1, so parantheses are needed.
+makeCompareFunc :: String
+makeCompareFunc = unlines 
+  [ "def c(ast, cat: str) -> list:"
+  , "  cl = ast.__class__"
+  , "  if cl in cdict:"
+  , "    clCat = cdict[cl]"
+  , "    clCatAlphas = ''.join(filter(str.isalpha, clCat))"
+  , "    catAlphas = ''.join(filter(str.isalpha, cat))"
+  , "    clCatNums = ''.join(filter(str.isnumeric, clCat))"
+  , "    catNums = ''.join(filter(str.isnumeric, cat))"
+  , "    clCatNum = 0"
+  , "    catNum = 0"
+  , "    if clCatAlphas == catAlphas:"
+  , "      if len(clCatNums) > 0:"
+  , "        clCatNum = int(clCatNums)"
+  , "      if len(catNums) > 0:"
+  , "        catNum = int(catNums)"
+  , "      if clCatNum < catNum:"
+  , "        return ['('] + lin(ast) + [')']"
+  , "  return lin(ast)"
+  ]
+
+
+-- | Returns the AST as a list of characters, which can be sent into the
+--   renderC.function.
+makeLinFunc :: CF -> String
+makeLinFunc cf = unlines 
+  [ "def lin(ast: object) -> list:"
+  , "  match ast:"
+  , concat 
+    [ ifUsedThen catInteger 
+      [ "    case Integer():"
+      , "      return [str(ast)]"
+      ]
+    , ifUsedThen catDouble 
+      [ "    case Double():"
+      , "      if ast.is_integer():"
+      , "        return [str(int(ast))]"
+      , "      else:"
+      , "        return [str(ast)]"
+      ]
+    , ifUsedThen catString 
+      [ "    case String():"
+      , "      return [ast, ' ']"
+      ]
+    , ifUsedThen catIdent 
+      [ "    case Ident():"
+      , "      return [ast]"
+      ]
+    , ifUsedThen catChar 
+      [ "    case Char():"
+      , "      return [ast]"
+      ]
+    ]
+  , "    # skeleTokenCases:"
+  , unlines skeleTokenCases
+  , "    # skeleRuleCases:"
+  , unlines skeleRuleCases
+  , -- Deals with cases where the entrypoint is say [Stm] or [Exp], 
+    -- with pattern matching on the first object in the list.
+    "    case " ++ "list():"
+  , "      if len(ast) == 0:"
+  , "        return []"
+  , "      else:"
+  , "        match ast[0]:"
+  , unlines listEntrypointCases
+  , "          case _:"
+  , "            raise Exception(ast[0].__class__.__name__, " ++
+      "'unmatched ast[0]')"
+  , "    case _:"
+  , "      raise Exception(str(ast.__class__) + ' unmatched')"
+  ]
+  where
+    -- Used to include standard literals, if needed.
+    ifUsedThen :: TokenCat -> [String] -> String
+    ifUsedThen cat ss
+      | isUsedCat cf (TokenCat cat) = unlines ss
+      | otherwise = ""
+
+    -- Figures out the deliminators for the separators and terminators,
+    -- to further process a deconstructed object that contains list(s).
+    rules = [r | r <- cfgRules cf
+      , not (isCoercion r)
+      , not (isDefinedRule r)
+      , not (isNilCons r)
+      ]
+
+    skeleTokenCases = map makeSkeleTokenCase (tokenNames cf)
+    skeleRuleCases = map makeSkeleRuleCase rules
+
+    catEntrypointsForLists = 
+      [catOfList c | c <- (List1.toList . allEntryPoints) cf, isList c]
+    
+    -- The Haskell backend defaults to the production for the lowest
+    -- precedence for lists that are defined. Like ``separator Exp1 ","``.
+    lowestPrecListCats = [c | c <- catEntrypointsForLists, 
+      precCat c == (minimum (map precCat 
+            [c2 | c2 <- catEntrypointsForLists, normCat c == normCat c2]
+          )
+        )
+      ]
+
+    listEntrypointCases = 
+        map (makeListEntrypointCase cf) lowestPrecListCats
+
+
+-- | Creates cases that checks what class individual nodes might be, meaning
+--   the rule names, or the token categories
+makeListEntrypointCase :: CF -> Cat -> String
+makeListEntrypointCase cf c = concat 
+  [ "          case " ++ intercalate "|" constructors ++ ":\n"
+  , "            return list" ++ show c ++ "Decon(ast)"
+  ]
+  where
+    constructors = if isTokenCat c 
+      then [show c ++ "()"]
+      else map ((++ "()") . funName) 
+        [ 
+          r | r <- rulesForNormalizedCat cf (normCat c), 
+          not (isCoercion r), 
+          not (isDefinedRule r)
+        ]
+
+
+-- Creates a case for a user defined literal, which inherits str.
+makeSkeleTokenCase :: String -> String
+makeSkeleTokenCase tokenName = concat
+  [ "    case " ++ tokenName ++ "():\n"
+  , "      return [ast]"
+  ]
+
+
+-- | Creates a case for some rule, with the additional information of what
+--   separator- and terminator-delimiters there are. 
+makeSkeleRuleCase :: Rul RFun -> String
+makeSkeleRuleCase rule = concat
+  [ "    case " ++ fName ++ "(" ++ varNamesCommad ++ "):\n"
+  , "      # " ++ (showEcss sentForm) ++ "\n"
+  , "      return " ++ if (length args > 0)
+    then (intercalate " + " args) 
+    else "[]"
+  ]
+  where
+    fName = wpThing (funRule rule)
+    sentForm = rhsRule rule
+
+    nvCats = numVars sentForm :: [Either (Cat, Doc) String]
+
+    enumeratedVarNames = [render d | (c, d) <- lefts nvCats]
+
+    varNamesCommad = if length enumeratedVarNames > 0 
+      then addCommas (enumeratedVarNames ++ ["_ann_type"])
+      else ""
+
+    args = ecssAndVarsToList 
+      sentForm 
+      enumeratedVarNames
+
+
+-- | Creates a list of a production with both terminals and non-terminals.
+ecssAndVarsToList :: [Either Cat String] -> [String] -> [String]
+ecssAndVarsToList [] _ = []
+ecssAndVarsToList (Left c:ecss) (s:ss)
+  | isList c = ["list" ++ name ++ "Decon(" ++ s ++ ")"] ++ 
+  ecssAndVarsToList ecss ss
+  | otherwise = ["c(" ++ s ++ ", '" ++ (show c) ++ "')"] ++ 
+  ecssAndVarsToList ecss ss
+  where
+    name = show $ catOfList c
+ecssAndVarsToList (Right strOp:ecss) ss = 
+  ["['" ++ escapeChars strOp ++ "']"] ++ ecssAndVarsToList ecss ss
+
diff --git a/source/src/BNFC/Backend/Python/CFtoPySkele.hs b/source/src/BNFC/Backend/Python/CFtoPySkele.hs
new file mode 100644
index 00000000..5297fa02
--- /dev/null
+++ b/source/src/BNFC/Backend/Python/CFtoPySkele.hs
@@ -0,0 +1,114 @@
+
+{-  
+    BNF Converter: Python skeleton-code generator
+    Copyright (C) 2024  Author: Bjorn Werner
+-}
+
+module BNFC.Backend.Python.CFtoPySkele where
+import BNFC.CF 
+import BNFC.Backend.Python.PyHelpers
+import Data.Char     (toLower)
+import BNFC.Backend.Common.NamedVariables
+import Text.PrettyPrint (Doc, render)
+import Data.Either   (lefts)
+
+-- | Entrypoint.
+cf2PySkele :: String -> CF -> String
+cf2PySkele pkgName cf = unlines
+  [ "from ply.lex import lex"
+  , "from ply.yacc import yacc"
+  , "import sys"
+  , "from " ++ pkgName ++ ".LexTokens import *"
+  , "from " ++ pkgName ++ ".ParsingDefs import *"
+  , "from " ++ pkgName ++ ".PrettyPrinter import *"
+  , ""
+  , makeSkele cf
+  ]
+
+
+-- Creates first a matcher with all value categories, and underneath one
+-- matcher for each value category.
+makeSkele :: CF -> String
+makeSkele cf = unlines 
+  [ "# Categories combined into one matcher"
+  , "def skeleMatcher(ast: object):"
+  , "\tmatch ast:"
+  , unlines skeleLiteralCases
+  , unlines skeleTokenCases
+  , unlines skeleRuleCases
+  , "\t\tcase _:"
+  , "\t\t\traise Exception(str(ast.__class__) + ' unmatched')"
+  , ""
+  , "# Categories split into their own matchers"
+  , unlines matchersOnCats               
+  ]
+  where
+    rules = 
+      [ r | r <- cfgRules cf
+      , not (isCoercion r)
+      , not (isDefinedRule r)
+      , not (isNilCons r)
+      ]
+
+    presentLiterals = ifC catInteger ++ 
+      ifC catDouble ++
+      ifC catString ++
+      ifC catIdent ++
+      ifC catChar
+        
+    skeleLiteralCases = map makeSkeleTokenCase presentLiterals
+    skeleTokenCases = map makeSkeleTokenCase (tokenNames cf)
+    skeleRuleCases = map makeSkeleRuleCase rules
+
+    parserCats = filter (not . isList) (allParserCatsNorm cf) :: [Cat]
+    rulesfornormalizedcat = map (rulesForNormalizedCat cf) parserCats
+    parserCatsWithRules = zip parserCats rulesfornormalizedcat
+
+    matchersOnCats = map makeMatcherOnCat parserCatsWithRules
+
+    ifC :: TokenCat -> [String]
+    ifC cat = if isUsedCat cf (TokenCat cat) then [cat] else []
+
+
+-- Creates a matcher for some value category.
+makeMatcherOnCat :: (Cat, [Rul RFun]) -> String
+makeMatcherOnCat (c, rules) = unlines 
+  [ "def matcher" ++ show c ++ "(" ++ varName ++ ": " ++ show c ++ "):"
+  , "\tmatch " ++ varName ++ ":"
+  , unlines cases
+  ,"\t\tcase _:"
+  ,"\t\t\traise Exception(str(" ++ varName ++ ".__class__) + ' unmatched')"
+  ]
+  where
+    varName = map toLower (show c) ++ "_"
+    cases = map makeSkeleRuleCase (filter
+      (\r -> not (isCoercion r) && not (isDefinedRule r))
+      rules)
+
+
+-- | Creates a case for some rule.
+makeSkeleRuleCase :: Rul RFun -> String
+makeSkeleRuleCase rule = concat 
+  [ "\t\tcase " ++ fName ++ "(" ++ varNamesCommad ++ "):\n"
+  , "\t\t\t# " ++ (showEcss sentForm) ++ "\n"
+  , "\t\t\traise Exception('" ++ fName ++ " not implemented')"
+  ]
+  where
+    funcRStr = funRule rule :: RString
+    fName = wpThing funcRStr :: String
+    sentForm = rhsRule rule
+
+    nvCats = numVars sentForm :: [Either (Cat, Doc) String]
+
+    enumeratedVarNames = [render d | (_, d) <- lefts nvCats]
+
+    varNamesCommad = addCommas (enumeratedVarNames ++ ["_ann_type"])
+
+
+-- | Creates a case for a user-defined token.
+makeSkeleTokenCase :: String -> String
+makeSkeleTokenCase tokenName = concat 
+  [ "\t\tcase " ++ tokenName ++ "():\n"
+  , "\t\t\traise Exception('not implemented')"
+  ]
+
diff --git a/source/src/BNFC/Backend/Python/PyHelpers.hs b/source/src/BNFC/Backend/Python/PyHelpers.hs
new file mode 100644
index 00000000..300c1d9f
--- /dev/null
+++ b/source/src/BNFC/Backend/Python/PyHelpers.hs
@@ -0,0 +1,80 @@
+
+{-  
+    BNF Converter: Python backend helper functions
+    Copyright (C) 2024  Author: Bjorn Werner
+-}
+
+module BNFC.Backend.Python.PyHelpers where
+import Data.List     ( intercalate )
+import Data.Char
+import BNFC.CF
+
+
+addCommas :: [String] -> String
+addCommas ss = intercalate ", " ss
+
+
+addCitationSigns :: String -> String
+addCitationSigns ss = "'" ++ ss ++ "'"
+
+
+filterOut :: Eq a => [a] -> [a] -> [a]
+filterOut xs ys = filter (\x -> not (elem x ys)) xs 
+
+
+-- Converts every character to unicode with an underscore in front.
+toOrd :: String -> String
+toOrd s = concat (map (("_" ++) . show . ord) s)
+
+
+-- | Converts a string of underscores and unicode numbers such as "_53_53"
+--   into "++".
+toChr :: String -> String
+toChr "" = ""
+toChr xs =  map chr nrs
+   where
+      nrsStr = tail $ split '_' xs :: [String]
+      nrs = map read nrsStr :: [Int]
+
+ 
+split :: Char -> String -> [String]
+split c s = split' c s ""
+
+
+split' :: Char -> String -> String -> [String]
+split' _ [] ps = [ps]
+split' c (s:ss) ps
+   | c == s = [ps] ++ split' c ss ""
+   | otherwise = split' c ss (ps ++ [s])
+
+
+-- Converts [Cat] into ListCat, which is mainly used in the parser.
+translateToList :: String -> String
+translateToList s
+  | strIsList s = "List" ++ (tail $ init s)
+  | otherwise = s
+
+
+strIsList :: String -> Bool
+strIsList s = head s == '[' && last s == ']'
+
+
+firstRight :: [Either a b] -> Maybe b
+firstRight [] = Nothing
+firstRight (Left _:es) = firstRight es
+firstRight (Right r:_) = Just r
+
+
+-- Retrieves the first character from strings such as "[Stm]" or "Stm".
+firstAlpha :: String -> Char
+firstAlpha s
+  | strIsList s = head $ tail s
+  | otherwise = head s
+
+
+-- | Converts a production into a string, for comments.
+showEcss :: [Either Cat String] -> String
+showEcss [] = ""
+showEcss (Left c:ecss) = show c ++ " " ++ (showEcss ecss)
+showEcss (Right strOp:ecss) = "\"" ++ strOp ++ "\" " ++ (showEcss ecss)
+
diff --git a/source/src/BNFC/Backend/Python/RegToFlex.hs b/source/src/BNFC/Backend/Python/RegToFlex.hs
new file mode 100644
index 00000000..37e357b4
--- /dev/null
+++ b/source/src/BNFC/Backend/Python/RegToFlex.hs
@@ -0,0 +1,97 @@
+{-# LANGUAGE LambdaCase #-}
+
+{-
+  Due to the almost full similarity, the name RegToFlex remains from the
+  C backend (2024).
+-}
+
+module BNFC.Backend.Python.RegToFlex (printRegFlex, escapeChar) where
+
+-- modified from pretty-printer generated by the BNF converter
+
+import Data.Char (ord, showLitChar)
+import qualified Data.List as List
+import BNFC.Abs (Reg(..), Identifier(Identifier))
+import BNFC.Backend.Common (flexEps)
+
+
+-- the top-level printing method
+printRegFlex :: Reg -> String
+printRegFlex = render . prt 0
+
+
+-- you may want to change render and parenth
+render :: [String] -> String
+render = rend (0::Int) where
+  rend i ss = case ss of
+    "["      :ts -> cons "["  $ rend i ts
+    "("      :ts -> cons "("  $ rend i ts
+    t  : "," :ts -> cons t    $ space "," $ rend i ts
+    t  : ")" :ts -> cons t    $ cons ")"  $ rend i ts
+    t  : "]" :ts -> cons t    $ cons "]"  $ rend i ts
+    t        :ts -> space t   $ rend i ts
+    _            -> ""
+  cons s t  = s ++ t
+  space t s = if null s then t else t ++ s
+
+
+parenth :: [String] -> [String]
+parenth ss = ["("] ++ ss ++ [")"]
+
+
+-- the printer class does the job
+class Print a where
+  prt :: Int -> a -> [String]
+
+
+prPrec :: Int -> Int -> [String] -> [String]
+prPrec i j = if j<i then parenth else id
+
+
+instance Print Identifier where
+  prt _ (Identifier (_, i)) = [i]
+
+
+instance Print Reg where
+  prt i = \case
+   RSeq reg0 reg -> prPrec i 2 (concat [prt 2 reg0 , prt 3 reg])
+   RAlt reg0 reg -> prPrec i 1 (concat [prt 1 reg0 , ["|"] , prt 2 reg])
+
+   -- Flex does not support set difference. See link for valid patterns.
+   -- https://westes.github.io/flex/manual/Patterns.html#Patterns
+   -- RMinus reg0 reg -> prPrec i 1 (concat [prt 2 reg0 , ["#"] , prt 2 reg])
+   RMinus reg0 REps -> prt i reg0 -- REps is identity for set difference
+   RMinus RAny (RChar c)   -> [ concat [ "[^", escapeChar c,             "]" ] ]
+   RMinus RAny (RAlts str) -> [ concat [ "[^", concatMap escapeChar str, "]" ] ]
+     -- FIXME: unicode inside brackets [...] is not accepted by flex
+   -- FIXME: maybe we could add cases for char - RDigit, RLetter etc.
+   RMinus _ _ -> error  "Flex does not support general set difference"
+
+   RStar reg -> concat [ prt 3 reg , ["*"] ]
+   RPlus reg -> concat [ prt 3 reg , ["+"] ]
+   ROpt reg  -> concat [ prt 3 reg , ["?"] ]
+   REps      -> [ flexEps ]
+   RChar c   -> [ escapeChar c ]
+   -- Unicode characters cannot be inside [...] so we use | instead.
+   RAlts str -> prPrec i 1 $ List.intersperse "|" $ map escapeChar str
+     -- RAlts str -> concat [["["], prt 0 $ concatMap escapeChar str, ["]"]]
+   RSeqs str -> prPrec i 2 $ map escapeChar str
+   RDigit    -> [ "\\d"   ]
+   RLetter   -> [ "[A-Za-z]"  ] -- add underscore ?
+   RUpper    -> [ "[A-Z]" ]
+   RLower    -> [ "[a-z]"   ]
+   RAny      -> [ "."         ]
+
+
+-- | Handle special characters in regular expressions.
+escapeChar :: Char -> String
+escapeChar c
+  | c `elem` reserved       = '\\':[c]
+  | let x = ord c, x >= 256 = [c]  
+    -- keep unicode characters -- "\x" ++ showHex x ""
+  | otherwise               = showLitChar c ""
+  where
+  reserved :: String
+  reserved = " '$+-*=<>[](){}!?.,;:^~|&%#/\\$_@\""
+
+
diff --git a/source/src/BNFC/Options.hs b/source/src/BNFC/Options.hs
index ac5fdbf6..74a1c757 100644
--- a/source/src/BNFC/Options.hs
+++ b/source/src/BNFC/Options.hs
@@ -64,6 +64,7 @@ data Target = TargetC | TargetCpp | TargetCppNoStl
             | TargetHaskell | TargetHaskellGadt | TargetLatex
             | TargetJava | TargetOCaml | TargetPygments
             | TargetTreeSitter
+            | TargetPython
             | TargetCheck
   deriving (Eq, Bounded, Enum, Ord)
 
@@ -83,6 +84,7 @@ instance Show Target where
   show TargetPygments     = "Pygments"
   show TargetTreeSitter   = "Tree-sitter"
   show TargetCheck        = "Check LBNF file"
+  show TargetPython       = "Python"
 
 -- | Which version of Alex is targeted?
 data AlexVersion = Alex3
@@ -261,6 +263,7 @@ printTargetOption = ("--" ++) . \case
   TargetOCaml       -> "ocaml"
   TargetPygments    -> "pygments"
   TargetTreeSitter  -> "tree-sitter"
+  TargetPython      -> "python"
   TargetCheck       -> "check"
 
 printAlexOption :: AlexVersion -> String
@@ -314,6 +317,8 @@ targetOptions =
     "Output a Python lexer for Pygments"
   , Option "" ["tree-sitter"]   (NoArg (\o -> o {target = TargetTreeSitter}))
     "Output grammar.js file for use with tree-sitter"
+  , Option "" ["python"]         (NoArg (\ o -> o{target = TargetPython }))
+    "Output Python code for use with PLY"    
   , Option "" ["check"]         (NoArg (\ o -> o{target = TargetCheck }))
     "No output. Just check input LBNF file"
   ]
@@ -530,6 +535,7 @@ instance Maintained Target where
     TargetOCaml       -> True
     TargetPygments    -> True
     TargetTreeSitter  -> True
+    TargetPython      -> True
     TargetCheck       -> True
 
 instance Maintained AlexVersion where
@@ -661,4 +667,5 @@ translateOldOptions = mapM $ \ o -> do
     , ("--ghc"                , "--generic")
     , ("--deriveGeneric"      , "--generic")
     , ("--deriveDataTypeable" , "--generic")
+    , ("-python" , "--python")
     ]
diff --git a/testing/src/ParameterizedTests.hs b/testing/src/ParameterizedTests.hs
index ce0c945c..13d85e7d 100644
--- a/testing/src/ParameterizedTests.hs
+++ b/testing/src/ParameterizedTests.hs
@@ -421,6 +421,10 @@ parameters = concat
     , javaParams { tpName = "Java (with jflex and line numbers)"
                  , tpBnfcOptions = ["--java", "--jflex", "-l"] }
     ]
+    -- Python
+  , [ pythonParams { tpName = "Python"
+                 , tpBnfcOptions = ["--python"] }
+    ] 
   ]
   where
     base = baseParameters
@@ -444,6 +448,14 @@ parameters = concat
         , tpBnfcOptions = ["--ocaml"]
         , tpRunTestProg = haskellRunTestProg
         }
+    pythonParams = base
+        { tpBuild = do
+            return () -- nothing to make or compile
+        ,
+          tpRunTestProg = \ _lang args -> do
+            pyFile_ <- findFile "genTest.py"
+            cmd "python3.10" $ pyFile_ : args
+        }
 
 -- | Helper function that runs bnfc with the context's options and an
 --   option to generate 'tpMakefile'.

From 59181603b0fcfb50b5b0782b988e1fefb7558ecc Mon Sep 17 00:00:00 2001
From: AiStudent <8739546+AiStudent@users.noreply.github.com>
Date: Tue, 27 Aug 2024 14:57:22 +0200
Subject: [PATCH 2/8] Update user_guide.rst

---
 docs/user_guide.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/user_guide.rst b/docs/user_guide.rst
index 8273f1d5..90e9ed97 100644
--- a/docs/user_guide.rst
+++ b/docs/user_guide.rst
@@ -318,7 +318,7 @@ Example usage: ::
    * - skele.py
      - Provides skeleton code to deconstruct an AST, using structural pattern matching.
 
-Optionally one may with ``-m``` also create a makefile that contains the target
+Optionally one may with ``-m`` also create a makefile that contains the target
 "distclean" to remove the generated files.
 
 Testing the frontend

From 1f9a9179c22b97aea9c003ca37908efa725faa86 Mon Sep 17 00:00:00 2001
From: AiStudent <diskfragmenteraren@gmail.com>
Date: Tue, 27 Aug 2024 15:46:45 +0200
Subject: [PATCH 3/8] changed python3.10 commands to python3

---
 docs/user_guide.rst                     | 6 +++---
 document/BNF_Converter_Python_Mode.html | 2 +-
 testing/src/ParameterizedTests.hs       | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/user_guide.rst b/docs/user_guide.rst
index 8273f1d5..534eb1d2 100644
--- a/docs/user_guide.rst
+++ b/docs/user_guide.rst
@@ -326,15 +326,15 @@ Testing the frontend
 
 It's possible to pipe input, like::
 
-    echo "(1 + 2) * 3" | python3.10 genTest.py
+    echo "(1 + 2) * 3" | python3 genTest.py
 
 or::
 
-    python3.10 genTest.py < file.txt
+    python3 genTest.py < file.txt
 
 and it's possible to just use an argument::
 
-    python3.10 genTest.py file.txt
+    python3 genTest.py file.txt
 
 
 Caveats
diff --git a/document/BNF_Converter_Python_Mode.html b/document/BNF_Converter_Python_Mode.html
index 429b5c44..8021b3e1 100644
--- a/document/BNF_Converter_Python_Mode.html
+++ b/document/BNF_Converter_Python_Mode.html
@@ -76,7 +76,7 @@ <h3>Testing the frontend</h3>
   The following example uses a frontend that is generated from a C-like grammar.
 </p>
 <p style="font-family: monospace;">
-  $ python3.10 genTest.py < hello.c
+  $ python3 genTest.py < hello.c
 </p>
 <p style="font-family: monospace;">
   Generating LALR tables<br>
diff --git a/testing/src/ParameterizedTests.hs b/testing/src/ParameterizedTests.hs
index 13d85e7d..8231c8eb 100644
--- a/testing/src/ParameterizedTests.hs
+++ b/testing/src/ParameterizedTests.hs
@@ -454,7 +454,7 @@ parameters = concat
         ,
           tpRunTestProg = \ _lang args -> do
             pyFile_ <- findFile "genTest.py"
-            cmd "python3.10" $ pyFile_ : args
+            cmd "python3" $ pyFile_ : args
         }
 
 -- | Helper function that runs bnfc with the context's options and an

From 8ee25b37f1472cd9a1f17c5ed0a2987f101e7613 Mon Sep 17 00:00:00 2001
From: AiStudent <diskfragmenteraren@gmail.com>
Date: Tue, 27 Aug 2024 15:54:26 +0200
Subject: [PATCH 4/8] Cleanup

---
 docs/user_guide.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/user_guide.rst b/docs/user_guide.rst
index 780cd4db..4e517da9 100644
--- a/docs/user_guide.rst
+++ b/docs/user_guide.rst
@@ -290,7 +290,7 @@ Python Backend
 
 The BNF Converter's Python Backend generates a Python frontend, that uses 
 `PLY <https://www.dabeaz.com/ply/ply.html>`_ (Python Lex Yacc), to parse
-input into an AST (abstract syntax tree).
+input into an abstract syntax tree.
 
 Python 3.10 or higher is needed.
 

From c8d5ede6851a99e52897854c3fe98177ff22ae54 Mon Sep 17 00:00:00 2001
From: AiStudent <diskfragmenteraren@gmail.com>
Date: Tue, 5 Nov 2024 13:45:29 +0100
Subject: [PATCH 5/8] Updated to Lark

---
 source/BNFC.cabal                           |   1 -
 source/src/BNFC/Backend/Python.hs           |  41 +-
 source/src/BNFC/Backend/Python/CFtoPyAbs.hs | 563 +++++++++++---------
 source/src/BNFC/Backend/Python/CFtoPyLex.hs | 189 -------
 4 files changed, 319 insertions(+), 475 deletions(-)
 delete mode 100644 source/src/BNFC/Backend/Python/CFtoPyLex.hs

diff --git a/source/BNFC.cabal b/source/BNFC.cabal
index c232b401..9280a8f3 100644
--- a/source/BNFC.cabal
+++ b/source/BNFC.cabal
@@ -283,7 +283,6 @@ library
     -- Python backend
     BNFC.Backend.Python
     BNFC.Backend.Python.CFtoPyAbs
-    BNFC.Backend.Python.CFtoPyLex
     BNFC.Backend.Python.CFtoPyPrettyPrinter
     BNFC.Backend.Python.RegToFlex
     BNFC.Backend.Python.PyHelpers
diff --git a/source/src/BNFC/Backend/Python.hs b/source/src/BNFC/Backend/Python.hs
index e0a3da35..13568e38 100644
--- a/source/src/BNFC/Backend/Python.hs
+++ b/source/src/BNFC/Backend/Python.hs
@@ -14,23 +14,20 @@ import BNFC.CF (CF, firstEntry)
 import BNFC.Options (SharedOptions, optMake, lang)
 import BNFC.Backend.Base (MkFiles, mkfile)
 import BNFC.Backend.Python.CFtoPyAbs (cf2PyAbs)
-import BNFC.Backend.Python.CFtoPyLex (cf2PyLex)
 import BNFC.Backend.Python.CFtoPyPrettyPrinter (cf2PyPretty)
 import BNFC.Backend.Python.CFtoPySkele (cf2PySkele)
 import BNFC.Backend.Python.PyHelpers
-
-import BNFC.PrettyPrint -- For Doc
+import BNFC.PrettyPrint
 import qualified BNFC.Backend.Common.Makefile as Makefile
 
+
 -- | Entrypoint for BNFC to use the Python backend.
 makePython :: SharedOptions -> CF -> MkFiles ()
 makePython opts cf = do
     let pkgName =  "bnfcPyGen" ++ name
-    let (lexerDefs, tokensPly) = cf2PyLex cf
-    let (parsingDefs, abstractClasses) = cf2PyAbs pkgName cf tokensPly
+    let (parsingDefs, abstractClasses) = cf2PyAbs pkgName cf
     let prettyPrinter = cf2PyPretty pkgName cf
     let skeletonCode = cf2PySkele pkgName cf
-    mkPyFile (pkgName ++ "/LexTokens.py") lexerDefs
     mkPyFile (pkgName ++ "/ParsingDefs.py") parsingDefs
     mkPyFile (pkgName ++ "/Absyn.py") abstractClasses
     mkPyFile (pkgName ++ "/PrettyPrinter.py") prettyPrinter
@@ -56,11 +53,9 @@ makefile pkgName optMakefileName basename = vcat
   , Makefile.mkRule "vclean" []
       [ "rm -f " ++ unwords
         [ 
-          pkgName ++ "/LexTokens.py",
           pkgName ++ "/ParsingDefs.py",
           pkgName ++ "/Absyn.py",
           pkgName ++ "/PrettyPrinter.py",
-          pkgName ++ "/LexTokens.py.bak",
           pkgName ++ "/ParsingDefs.py.bak",
           pkgName ++ "/Absyn.py.bak",
           pkgName ++ "/PrettyPrinter.py.bak",
@@ -74,7 +69,6 @@ makefile pkgName optMakefileName basename = vcat
         "rmdir " ++ pkgName,
         "rm -f __pycache__/*.pyc",
         "rm -fd __pycache__",
-        "rm -f parser.out parsetab.py",
         "rm -f " ++ makefileName,
         "rm -f " ++ makefileName ++ ".bak"
       ]
@@ -94,17 +88,14 @@ comment x = "# " ++ x
 pyTest :: String -> CF -> String
 pyTest pkgName cf = unlines 
   [
-    "from ply.lex import lex",
-    "from ply.yacc import yacc",
     "import sys",
-    "from " ++ pkgName ++ ".LexTokens import *",
     "from " ++ pkgName ++ ".ParsingDefs import *",
     "from " ++ pkgName ++ ".PrettyPrinter import *",
     "",
     "",
     "# Suggested input options:",
-    "# python3.10 genTest.py < sourcefile",
-    "# python3.10 genTest.py sourcefile inputfile (i.e. for interpreters).",
+    "# python3 genTest.py < sourcefile",
+    "# python3 genTest.py sourcefile inputfile (i.e. for interpreters).",
     "inputFile = None",
     "if len(sys.argv) > 1:",
     "\tf = open(sys.argv[1], 'r')",
@@ -117,22 +108,14 @@ pyTest pkgName cf = unlines
     "\tfor line in sys.stdin:",
     "\t\tinp += line",
     "",
-    "",
-    "# Customizable error handling for the parsing",
-    "def p_error(p: lex.LexToken):",
-      "\tif p is None:",
-        "\t\tprint('No rule could reduce the tokenized input')",
-      "\telse:",
-        "\t\tprint('line:', p.lineno, 'lexpos:', p.lexpos, f'Syntax error at {p.value!r}')",
-        "\t\tp.lexer.syntaxError = True",
-    "",
+    "def onError(e):",
+    "  print(e)",
+    "  print('Parse failed')",
+    "  quit(1)",
     "",
     "# By default the first entrypoint is used. See ParsingDefs.py for alternatives.",
-    "lexer = lex.lex()",
-    "parser = yacc(start=" ++ defaultEntry ++ ")",
-    "lexer.syntaxError = False",
-    "ast = parser.parse(inp, lexer=lexer)",
-    "if ast and not lexer.syntaxError:",
+    "ast = parser.parse(inp, on_error=onError)",
+    "if ast: # and not lexer.syntaxError:",
       "\tprint('Parse Successful!\\n')",
       "\tprint('[Abstract Syntax]')",
       "\tprint(printAST(ast))",
@@ -144,6 +127,4 @@ pyTest pkgName cf = unlines
       "\tprint('Parse failed')",
       "\tquit(1)"
   ]
-  where
-    defaultEntry = (addCitationSigns . translateToList . show . firstEntry) cf
 
diff --git a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
index a4712fb0..adad97f3 100644
--- a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
+++ b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
@@ -9,45 +9,47 @@ module BNFC.Backend.Python.CFtoPyAbs (cf2PyAbs) where
 import Data.List     ( nub, intercalate )
 import BNFC.CF
 import BNFC.Backend.Python.PyHelpers
+import BNFC.Backend.Python.RegToFlex (printRegFlex, escapeChar)
 import BNFC.Backend.Common.NamedVariables
 import Text.PrettyPrint (Doc, render)
 import Data.Either   (lefts)
-import Data.Char                    (toLower)
+import Data.Char                    (toLower, toUpper)
 import qualified Data.List.NonEmpty as List1
 
+
 -- | The result is ParsingDefs.py & Absyn.py
 cf2PyAbs
   :: String
   -> CF     -- ^ Grammar.
-  -> [(String, String)] -- Tokens to unicode mapping
   -> (String, String) -- ParsingDefs.py, Absyn.py.
-cf2PyAbs pkgName cf tokensPly = ( unlines 
-  [ "from " ++ pkgName ++ ".Absyn import *"
-  , "\n\n" ++ createCommonEntrypointDef cf
-  , "\n\n" ++ (unlines parsingDefs)
-  , if length definesParsingDefs > 0 
-    then "\n\n# Parsing rules from defines"
-    else ""
-  , "\n\n" ++ unlines definesParsingDefs
+cf2PyAbs pkgName cf = ( unlines 
+  [ "from lark import Lark, Transformer, v_args"
+  , "from dataclasses import dataclass"
+  , "from " ++ pkgName ++ ".Absyn import *"
+  , ""
+  , createGrammar cf
+  , createTransformer cf
+  , ""
+  , "# Create Lark parser with the given grammar"
+  , "parser = Lark(grammar, start='start', parser='lalr', lexer='basic', " ++
+    "transformer=TreeTransformer())"
+  , ""
+  ]
+  , unlines 
+  ["from typing import List as _List"
+  ,"# Value categories (no coercsions):"
+  , unlines valueCatsClasses 
+  , ""
+  , placeholderVariableClass
+  , ""
+  ,"# Rules:"
+  ,"from dataclasses import dataclass, field"
+  ,"\n" ++ (unlines dataClasses)
   ]
-  , "from typing import List as _List" ++ 
-    "\n\n# Value categories (no coercsions):" ++
-    "\n\n" ++ unlines valueCatsClasses ++ 
-    "\n\n" ++ placeholderVariableClass ++ 
-    "\n\n# Rules:" ++
-    "\n" ++ "from dataclasses import dataclass, field" ++
-    "\n\n" ++ (unlines dataClasses)
   )
   where
     rules = cfgRules cf
-
-    -- To create ParsingDefs.py 
-    parsingDefs :: [String]
-    parsingDefs = map (ruleToParsingDef cf tokensPly)
-      [r | r <- rules, isParsable r, not (isDefinedRule r)]
     
-    definesParsingDefs = makeDefineParsingDefs cf tokensPly
-
     -- To create Absyn.py
     dataClasses :: [String]
     dataClasses = map makePythonClass
@@ -69,10 +71,289 @@ cf2PyAbs pkgName cf tokensPly = ( unlines
       , "Integer(int)"
       , "Double(float)"
       ]
-
     valueCatsClasses = map createValueCatClass valueCatNames
-    
 
+
+-- Creates a grammar for Lark. Not that it is a real string (r"...").
+createGrammar :: CF -> String
+createGrammar cf = unlines 
+  [ "grammar = r\"\"\""
+  , "  ?start: " ++ map toLower ((translateToList .show . firstEntry) cf)
+  , ""
+  , unlines orClauses
+  , larkLiterals cf
+  , unlines singleComments
+  , unlines multiComments
+  , "  %import common.WS"
+  , "  %ignore WS"
+  , "\"\"\""
+  ]
+  where
+    aCats = reallyAllCats cf
+    rs = cfgRules cf
+
+    enumeratedRules :: [(Int, Rul RFun)]
+    enumeratedRules = enumerateAllDefinedRules rs 1 []
+    orClauses = map (createOrClause cf enumeratedRules) aCats
+
+    (multiMatchers, singleMatchers) = comments cf
+    singleComments = map createLineCommentMatcher singleMatchers
+    multiComments = map createMultiLineCommentMatcher multiMatchers
+
+
+-- Enumerates all (only defined relevant) rules to prevent naming overlap.
+enumerateAllDefinedRules :: [Rul RFun] -> Int -> [(Int, Rul RFun)]
+  -> [(Int, Rul RFun)]
+enumerateAllDefinedRules [] _ irs = irs
+enumerateAllDefinedRules (r:rs) n irs
+  | isDefinedRule r = enumerateAllDefinedRules rs (n+1) (irs ++ [(n, r)])
+  | otherwise = enumerateAllDefinedRules rs n (irs ++ [(0, r)]) 
+
+
+-- Creates an or clause with all rules for a given category.
+createOrClause :: CF -> [(Int, Rul RFun)] -> Cat -> String
+createOrClause cf irs c = unlines
+  [ "  ?" ++ map toLower (translateToList (show c)) ++ ": " ++ 
+    intercalate "\n  | " 
+      (map createProdAndNameForRule catsIrs)
+  ]
+  where
+    catsIrs = [(n, removeWhiteSpaceSeparators r) | (n, r) <- irs, 
+      valCat r == c, isParsable r]
+
+
+-- Creates an entry for an or clause.
+createProdAndNameForRule :: (Int, Rul RFun) -> String
+createProdAndNameForRule (n, r) =  prodToDocStr (rhsRule r) ++ 
+  if (not (isCoercion r)) then " -> " ++ map toLower name else ""
+  where
+    name
+      | isNilFun r = "nil" ++ (identCat . valCat) r
+      | isOneFun r = "one" ++ (identCat . valCat) r
+      | isConsFun r = "cons" ++ (identCat . valCat) r
+      | isDefinedRule r = "d" ++ show n ++ "_r_" ++ funName r
+      | otherwise = "r_" ++ funName r 
+
+
+-- Creates the literals for a grammar for Lark.
+larkLiterals :: CF -> String
+larkLiterals cf = unlines $ concat
+  [ 
+  ifC catString  [createLiteral "String.2" "\"(\\\\.|[^\"])*\""]
+  , ifC catChar  [createLiteral "Char.2" "\\'(\\\\x[0-9a-f][0-9a-f]|\\\\?[\\S\\s])\\'"]
+  , ifC catDouble  [createLiteral "Double.2" "\\d+\\.\\d+(e-?\\d+)?"]
+  , ifC catInteger [createLiteral "Integer.2" "\\d+"]
+  -- Prolog requires user defined tokens to have priority over Ident; C 
+  -- requires Double to have priority over user defined tokens, as C has
+  -- "CDouble" matching "3." in 3.14. The lexer definitions rely on the order
+  -- for priority, not the length.
+  , userDefTokens 
+  , ifC catIdent   [createLiteral "Ident" "[A-Za-z]\\w*"]
+  ]
+  where
+    ifC :: TokenCat -> [String] -> [String]
+    ifC cat s = if isUsedCat cf (TokenCat cat) then s else []
+
+    userDefTokens :: [String] 
+    userDefTokens = [
+      createLiteral (name) (printRegFlex exp) | (name, exp) <- tokenPragmas cf
+      ]
+
+    createLiteral :: String -> String -> String
+    createLiteral name regex = 
+      "  " ++ map toUpper name ++ ": /" ++ regex ++ "/"
+
+
+-- Creates the class transformer, where each member method tells Lark how
+-- to transform some parsed node in the tree. 
+createTransformer :: CF -> String
+createTransformer cf = unlines 
+  [ "#transformer"
+  , "class TreeTransformer(Transformer):"
+  , unlines (map createRuleTransform rs)
+  , unlines (map (makeDefineTransform cf) enumeratedRDs)
+  , unlines (map createListTransform listRules)
+  , createTokenTransformers cf
+  ]
+  where
+    enumeratedRules :: [(Int, Rul RFun)]
+    enumeratedRules = enumerateAllDefinedRules (cfgRules cf) 1 []
+
+    rs = [r | r <- cfgRules cf
+      , not (isCoercion r)
+      , not (isNilCons r)
+      , not (isDefinedRule r)]
+    listRules = [r | r <- cfgRules cf, isNilCons r]
+
+    enumeratedRDs = [(n, r, d) | (n, r) <- enumeratedRules, d <- definitions cf
+      , not (isCoercion r)
+      , not (isNilCons r)
+      , isDefinedRule r
+      , nameCorresponds ((wpThing . defName) d) (funName r)]
+
+
+-- Creates a transform for a rule
+createRuleTransform :: Rul RFun -> String
+createRuleTransform r = unlines
+  [ "  @v_args(inline=True)"
+  , "  def r_" ++ map toLower (funName r) ++ "(self" ++ 
+    concat (map (", " ++) enumeratedVars) ++ "):"
+  , "    return " ++ funName r ++ "(" ++ intercalate ", " enumeratedVars ++ ")"
+  ] 
+  where
+    sentForm = rhsRule r
+    nvCats = numVars sentForm :: [Either (Cat, Doc) String]
+    enumeratedVars = [render d | (c, d) <- lefts nvCats]
+
+
+-- Creates a transform for a list rule.
+createListTransform :: Rul RFun -> String
+createListTransform r = unlines
+  [ "  @v_args(inline=True)"
+  , "  def " ++ map toLower name ++ "(self" ++ 
+    concat (map (", " ++) enumeratedVars) ++ "):"
+  , "    return " ++ args
+  ] 
+  where
+    name
+      | isNilFun r = "nil" ++ (identCat . valCat) r
+      | isOneFun r = "one" ++ (identCat . valCat) r
+      | isConsFun r = "cons" ++ (identCat . valCat) r
+      | otherwise = funName r
+
+    sentForm = rhsRule r
+    nvCats = numVars sentForm :: [Either (Cat, Doc) String]
+    enumeratedVars = [render d | (c, d) <- lefts nvCats]
+
+    args :: String
+      | isNilFun r = "[]"
+      | isOneFun r = "[" ++ head enumeratedVars ++ "]"
+      | isConsFun r = "[" ++ head enumeratedVars ++ "] + " ++ 
+        last enumeratedVars
+      | otherwise = error "Should be a list function"
+
+
+-- Creates the transformer functions for the tokens.
+createTokenTransformers :: CF -> String
+createTokenTransformers cf = unlines $ concat
+  [ 
+  ifC catString  [createTokenTransform "String"]
+  , ifC catChar  [createTokenTransform "Char"]
+  , ifC catDouble  [createTokenTransform "Double"]
+  , ifC catInteger [createTokenTransform "Integer"]
+  -- Prolog requires user defined tokens to have priority over Ident; C 
+  -- requires Double to have priority over user defined tokens, as C has
+  -- "CDouble" matching "3." in 3.14. The lexer definitions rely on the order
+  -- for priority, not the length.
+  , userDefTokens 
+  , ifC catIdent   [createTokenTransform "Ident"]
+  ]
+  where
+  ifC :: TokenCat -> [String] -> [String]
+  ifC cat s = if isUsedCat cf (TokenCat cat) then s else []
+
+  userDefTokens :: [String] 
+  userDefTokens = [
+    createTokenTransform name | (name, exp) <- tokenPragmas cf
+    ]
+
+
+-- Creates a transform for a token.
+createTokenTransform :: String -> String
+createTokenTransform name = unlines
+  [ "  @v_args(inline=True)"
+  , "  def " ++ map toUpper name ++ "(self, token):"
+  , "    return " ++ name ++ "(token.value)"
+  ] 
+
+
+-- | Produces the production in the docstring for the parsing definitions.
+prodToDocStr ::[Either Cat String] -> String
+prodToDocStr [] = ""
+prodToDocStr (ec:[]) = ecsToDocStr ec
+prodToDocStr (ec:ecs) = 
+  ecsToDocStr ec ++ " " ++ prodToDocStr ecs
+
+
+-- Converts a single element in the production.
+ecsToDocStr :: Either Cat String -> String
+ecsToDocStr (Left (TokenCat t))       = map toUpper t 
+ecsToDocStr (Left c)       = map toLower (translateToList (show c))
+ecsToDocStr (Right strOp)  = "\"" ++ concat (map escapeBackslash strOp) ++ "\""
+
+
+-- | For single-line comments
+createLineCommentMatcher :: String -> String
+createLineCommentMatcher r = unlines
+  [ "  C" ++ toOrd r ++ ": /" ++ concat (map escapeChar r) ++ "[^\\n]*/"
+  , "  %ignore C" ++ toOrd r
+  ]
+
+
+-- | For multi-line comments
+createMultiLineCommentMatcher :: (String, String) -> String
+createMultiLineCommentMatcher (s, e) = unlines
+  [ "  C" ++ toOrd (s ++ e) ++ ": /" ++ escaped s ++ "([\\s\\S]*?)" ++ 
+    escaped e ++ "/"
+  , "  %ignore C" ++ toOrd (s ++ e)
+  ]
+  where
+    escaped s = concat $ map escapeChar s
+
+
+-- Since we're using a real string for the grammar,  r""" ... """ it seems 
+-- we can't escape everything in strOp from regflex. Only backslashes.
+escapeBackslash :: Char -> String
+escapeBackslash '\\' = "\\\\"
+escapeBackslash c = [c]
+
+
+-- | To compare names for defines. The first letter needs to be lowered, so
+--   "while" == "While".
+nameCorresponds :: String -> String -> Bool
+nameCorresponds (x:xs) (y:ys) = (toLower x == toLower y) && (xs == ys)
+nameCorresponds _ _ = error "Names can't be empty"
+
+
+-- Creates a transformer for a rule with its corresponding define.
+makeDefineTransform :: 
+  CF -> (Int, Rul RFun, Define) -> String
+makeDefineTransform cf (n, defRule, defi)  = unlines
+  [ "  @v_args(inline=True)"
+  , "  def d" ++ show n ++ "_r_" ++ map toLower name ++ "(self" ++ 
+    concat (map (", " ++) enumeratedVars) ++ "):"
+  , "    return " ++ expToDef env2 (defBody defi)
+  , ""
+  ]
+  where
+    name = (wpThing . defName) defi
+    sentForm = rhsRule defRule
+    args = map fst (defArgs defi)
+    nvCats = numVars sentForm :: [Either (Cat, Doc) String]
+    enumeratedVars = [render d | (c, d) <- lefts nvCats]
+    env2 = zip args enumeratedVars
+
+
+-- | Converts the production of a define, called an expression, to a
+--   production for the parsing definition.
+expToDef :: [(String, String)] -> Exp -> String
+expToDef env (App "(:)" _ (e:[App "[]" _ _])) = expToDef env e ++ "]"
+expToDef env (App "(:)" _ (e:[recList])) = "[" ++ expToDef env e ++ ", " ++
+  expToDef env recList
+expToDef _ (App "[]" _ _) = "[]"
+expToDef env (App fName _ exps) = 
+  fName ++ "(" ++ addCommas (map (expToDef env) exps) ++ ")"
+expToDef env (Var s) = case lookup s env of
+  Just p -> p
+  Nothing -> error "Missing variable in define enviroment"
+expToDef _ (LitInt i) = "Integer(" ++ show i ++ ")"
+expToDef _ (LitDouble d) = "Double(" ++ show d ++ ")"
+expToDef _ (LitChar s) = "Char(\"" ++ show s ++ "\")"
+expToDef _ (LitString s) = "String('" ++ show s ++ "')"
+
+
+-- A placeholder variable to store additional information, for say type
+-- annotation.
 placeholderVariableClass :: String
 placeholderVariableClass = unlines 
   [ "# Placeholder to add additional information to a node in the AST," ++
@@ -98,6 +379,7 @@ placeholderVariableClass = unlines
   , "    return str(self.__v.__class__)"
   ]     
 
+
 -- | Creates a parsing definition that points to all entrypoints.
 createCommonEntrypointDef :: CF -> String
 createCommonEntrypointDef cf = unlines 
@@ -126,37 +408,6 @@ createValueCatClass :: String -> String
 createValueCatClass s = "class " ++ s ++ ":\n\tpass\n"
 
 
--- | Creates a parsing definition, by checking what type of rule it is and
---   calling the corresponding make function.
-ruleToParsingDef :: CF -> [(String, String)] -> Rul RFun -> String
-ruleToParsingDef cf tokensPly rule 
-  | isCoercion funcRStr = 
-    makeParseCoercion cf tokensPly funcCat (fName, sentForm)
-  | isNilFun funcRStr = 
-    makeParseNil tokensPly funcCat (fNameTranslated, sentForm)
-  | isOneFun funcRStr = 
-    makeParseOne cf tokensPly funcCat (fNameTranslated, sentForm)
-  | isConsFun funcRStr = 
-    makeParseCons cf tokensPly funcCat (fNameTranslated, sentForm)
-  | isDefinedRule rule = 
-    error "Should not generate define rules in this step"
-  | otherwise = 
-    makeParseFunc cf tokensPly funcCat (fName, sentForm)
-  where
-    funcRStr = funRule rule :: RString
-    fName = wpThing funcRStr :: String
-
-    funcCat = valCat rule :: Cat
-    catStr = show (valCat rule) :: String
-
-    fNameTranslated :: String
-    fNameTranslated
-      | isNilFun funcRStr = catStr
-      | otherwise = fName
-
-    sentForm = rhsRule rule :: [Either Cat String]
-
-
 -- | Make a Python class from a rule's name and production.
 makePythonClass :: Rul RFun -> String
 makePythonClass rule = 
@@ -176,207 +427,9 @@ makePythonClass rule =
       ["_ann_type: _AnnType = field(default_factory=_AnnType)"])
 
 
-
 -- | Creates the corresponding type hinting for some member variable.
 strCatToPyTyping :: String -> String
 strCatToPyTyping s = 
   if strIsList s then "_List['" ++ (tail . init) s ++ "']" else s
 
 
--- | It could be this is only guarding against list categories.
-literalsToPytypeMaybe :: CF -> String -> Maybe String
-literalsToPytypeMaybe cf s = case s of
-  "Integer" -> Just "Integer"
-  "Double" -> Just "Double"
-  "Char"   -> Just "Char"
-  "String" -> Just "String"
-  "Ident" -> Just "Ident"
-  _ -> if s `elem` (tokenNames cf) then Just s else Nothing
-
-
--- | The following makeParse functions create their corresponding parsing 
---   definitions for some rule.
-makeParseFunc :: CF -> [(String, String)] -> Cat -> (String, SentForm)
-  -> String
-makeParseFunc cf tokensPly dataCat (name, sentForm) = unlines 
-  [ "def " ++ "p_" ++ name ++ "(p):\n" ++ "\t" ++ "\"\"\""
-  , "\t" ++ (show dataCat) ++ " : " ++ (prodToDocStr tokensPly sentForm)
-  , "\t" ++ "\"\"\""
-  , "\t" ++ "p[0] = " ++ rhs ++ "\n"
-  ]
-  where
-    rhs =  name ++ "(" ++ (addCommas (getLeftIndexes cf 1 sentForm)) ++  ")"
-
-
-makeParseCoercion :: CF -> [(String, String)] -> Cat -> (String, SentForm)
-  -> String
-makeParseCoercion cf tokensPly dataCat (_, sentForm) = unlines
-  [ "def " ++ "p_" ++ (show sourceCat) ++ "(p):\n" ++ "\t" ++ "\"\"\""
-  , "\t" ++ (show dataCat) ++ " : " ++ (prodToDocStr tokensPly sentForm)
-  , "\t" ++ "\"\"\""
-  , "\t" ++ "p[0] = " ++ strP ++ "\n"
-  ]
-  where
-    strP = head (getLeftIndexes cf 1 sentForm)
-    sourceCat = (head . lefts) sentForm
-
-
-makeParseNil :: [(String, String)] -> Cat -> (String, SentForm) -> String
-makeParseNil tokensPly dataCat (_, sentForm) = unlines
-  [ "def " ++ "p_" ++ "Nil" ++ translatedCat ++ "(p):\n" ++ "\t" ++ "\"\"\""
-  , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm)
-  , "\t" ++ "\"\"\""
-  , "\t" ++ "p[0] = []\n"
-  ]
-  where
-    translatedCat = translateToList $ show dataCat
-
-
-makeParseOne :: CF -> [(String, String)] -> Cat -> (String, SentForm) -> String
-makeParseOne cf tokensPly dataCat (_, sentForm) = unlines
-  [ "def " ++ "p_" ++ "One" ++ translatedCat ++ "(p):\n" ++ "\t" ++ "\"\"\""
-  , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm)
-  , "\t" ++ "\"\"\""
-  , "\t" ++ "p[0] = " ++ rhs ++ "\n"
-  ]
-  where
-    translatedCat = translateToList $ show dataCat
-    rhs = intercalate " + " (getLeftIndexesLists tokensPly cf 1 sentForm)
-
-
-makeParseCons :: CF -> [(String, String)] -> Cat -> (String, SentForm)
-  -> String
-makeParseCons cf tokensPly dataCat (_, sentForm) = unlines
-  [ "def " ++ "p_" ++ "Cons" ++ translatedCat ++ "(p):\n" ++ "\t" ++ "\"\"\""
-  , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm)
-  , "\t" ++ "\"\"\"" ++ "\n"
-  , "\t" ++ "p[0] = " ++ rhs ++ "\n"
-  ]
-  where
-    translatedCat = translateToList $ show dataCat
-    rhs = intercalate " + " (getLeftIndexesLists tokensPly cf 1 sentForm)
-
-
--- | Produces a list of the elements in the code production, where the indices
---   match the argument categories.
-getLeftIndexesLists :: [(String, String)] -> CF -> Int -> [Either Cat String]
-  -> [String]
-getLeftIndexesLists _ _ _ [] = []
-getLeftIndexesLists tokensPly cf n (Left c:ecs)
-  | isList c = [typedPTerm] ++ (getLeftIndexesLists tokensPly cf (n+1) ecs)
-  | otherwise = ["[" ++ typedPTerm ++ "]"] ++ 
-    (getLeftIndexesLists tokensPly cf (n+1) ecs)
-  where
-    pTerm = "p[" ++ (show n) ++ "]"
-    typedPTerm = case literalsToPytypeMaybe cf (show c) of
-      Just s -> s ++ "(" ++ pTerm ++ ")"
-      Nothing -> pTerm
-getLeftIndexesLists tokensPly cf n (Right strOp:ecs)
-  | separatorIsEmpty tokensPly strOp = getLeftIndexesLists tokensPly cf n ecs
-  | otherwise = getLeftIndexesLists tokensPly cf (n+1) ecs
-
-
--- | In case the deliminator is "" or is not defined for the lexer, like
---   ignored characters.
-separatorIsEmpty :: [(String, String)] -> String -> Bool
-separatorIsEmpty tokensPly strOp
-  | length strOp > 0 = case lookup strOp tokensPly of
-    Just _ -> False
-    Nothing -> True
-  | otherwise = True
-
-
--- | Produces a list of the elements in the code production, where the indices
---   match the argument categories.
-getLeftIndexes :: CF -> Int -> [Either Cat String] -> [String]
-getLeftIndexes _ _ [] = []
-getLeftIndexes cf n (Left c:ecs) = [typedPTerm] ++ 
-  (getLeftIndexes cf (n+1) ecs)
-  where
-    pTerm = "p[" ++ (show n) ++ "]"
-    typedPTerm = case literalsToPytypeMaybe cf (show c) of
-      Just s -> s ++ "(" ++ pTerm ++ ")"
-      Nothing -> pTerm
-getLeftIndexes cf n (Right _:ecs) = getLeftIndexes cf (n+1) ecs
-
-
--- | Produces the production in the docstring for the parsing definitions.
-prodToDocStr :: [(String, String)] -> [Either Cat String] -> String
-prodToDocStr _ [] = ""
-prodToDocStr tokensPly (ec:[]) = ecsToDocStr tokensPly ec
-prodToDocStr tokensPly (ec:ecs) = 
-  ecsToDocStr tokensPly ec ++ " " ++ prodToDocStr tokensPly ecs
-
-
--- Converts a single element in the production.
-ecsToDocStr :: [(String, String)] -> Either Cat String -> String
-ecsToDocStr _ (Left c)       = translateToList $ show c
-ecsToDocStr tokensPly (Right strOp)  = case lookup strOp tokensPly of
-  (Just s) -> s
-  Nothing -> ("") -- We assume it is no token, this affects getLeftIndexes
-
-
--- | Creating the parsing definitions for the defines.
-makeDefineParsingDefs :: CF ->  [(String, String)] -> [String]
-makeDefineParsingDefs cf tokensPly = defFuncsPy
-  where
-    rules = cfgRules cf
-    
-    definedRules :: [Rul RFun]
-    definedRules = [r | r <- rules, isDefinedRule r]
-  
-    pairs :: [(Rul RFun, Define)]
-    pairs = [(dr, d) | dr <- definedRules, d <- definitions cf, 
-      nameCorresponds ((wpThing . defName) d) (funName dr)]
-    
-    -- Adds a number to the name to make each define separate.
-    numberedPairs = zip [1..] pairs
-    defFuncsPy = map (makeDefineParsingDef cf tokensPly) numberedPairs 
-
-
--- | To compare names for defines. The first letter needs to be lowered, so
---   "while" == "While".
-nameCorresponds :: String -> String -> Bool
-nameCorresponds (x:xs) (y:ys) = (toLower x == toLower y) && (xs == ys)
-nameCorresponds _ _ = error "Names can't be empty"
-
-
--- | Creates a define parsing definition.
-makeDefineParsingDef :: 
-  CF -> [(String, String)] -> (Int, (Rul RFun, Define)) -> String
-makeDefineParsingDef cf tokensPly (n, (defRule, defi))  = unlines
-  [ "def p_D" ++ (show n) ++ name ++ "(p):"
-  , "\t\"\"\""
-  , "\t" ++ translatedCat ++ " : " ++ (prodToDocStr tokensPly sentForm)
-  , "\t\"\"\""
-  , "\t# " ++ show env
-  , "\tp[0] = " ++ expToDef env (defBody defi)
-  , ""
-  ]
-  where
-    name = (wpThing . defName) defi
-    translatedCat = translateToList $ (catToStr . valCat) defRule
-    sentForm = rhsRule defRule
-    indexes = getLeftIndexes cf 1 sentForm
-    args = map fst (defArgs defi)
-    env = zip args indexes
-
-
--- | Converts the production of a define, called an expression, to a
---   production for the parsing definition.
-expToDef :: [(String, String)] -> Exp -> String
-expToDef env (App "(:)" _ (e:[App "[]" _ _])) = expToDef env e ++ "]"
-expToDef env (App "(:)" _ (e:[recList])) = "[" ++ expToDef env e ++ ", " ++
-  expToDef env recList
-expToDef _ (App "[]" _ _) = "[]"
-expToDef env (App fName _ exps) = 
-  fName ++ "(" ++ addCommas (map (expToDef env) exps) ++ ")"
-expToDef env (Var s) = case lookup s env of
-  Just p -> p
-  Nothing -> error "Missing variable in define enviroment"
-expToDef _ (LitInt i) = "Integer(" ++ show i ++ ")"
-expToDef _ (LitDouble d) = "Double(" ++ show d ++ ")"
-expToDef _ (LitChar s) = "Char(\"" ++ show s ++ "\")"
-expToDef _ (LitString s) = "String('" ++ show s ++ "')"
-
-
diff --git a/source/src/BNFC/Backend/Python/CFtoPyLex.hs b/source/src/BNFC/Backend/Python/CFtoPyLex.hs
deleted file mode 100644
index fd1a532a..00000000
--- a/source/src/BNFC/Backend/Python/CFtoPyLex.hs
+++ /dev/null
@@ -1,189 +0,0 @@
-
-{-  
-    BNF Converter: Python lexer generator
-    Copyright (C) 2024  Author: Bjorn Werner
--}
-
-module BNFC.Backend.Python.CFtoPyLex ( cf2PyLex ) where
-
-import BNFC.CF
-
-import BNFC.Backend.Python.RegToFlex (printRegFlex, escapeChar)
-import BNFC.Backend.Python.PyHelpers
-
-
--- | The entrypoint, returns LexTokens.py and the unicode mapping.
-cf2PyLex :: CF -> (String, [(String, String)])
-cf2PyLex cf = (, tokensPly) $ unlines 
-  [ "import ply.lex as lex\n"
-  , ""
-  , createReservedMap reservedWordsEnv
-  , "# PLY tokens:\n" ++ plyTokens ++ "\n"
-  , "# PLY tokens with RegEx:"
-  , unlines plyTokensRegEx
-  , "# Literals:"
-  , plyLiterals cf
-  , "# Comments:"
-  , unlines singleComments
-  , unlines multiComments
-  , footer
-  ]
-  where
-    -- The reserved keywords and the symbols are zipped with a
-    -- unicode representation, which are needed for the parsing.
-
-    -- Reserved keywords -> [("int", "R_...")]
-    reservedWordsVar :: [String]
-    reservedWordsVar = reservedWords cf
-
-    reservedWordsEnv :: [(String, String)]
-    reservedWordsEnv = 
-      zip reservedWordsVar (map (("R" ++) . toOrd) reservedWordsVar)
-
-    -- Symbols -> [("+", "S_43")]
-    literalsVar :: [String]
-    literalsVar = literals cf
-
-    strOps :: [String]
-    strOps = map fst (cfTokens cf)
-
-    strOpsFiltered = filterOut strOps reservedWordsVar
-    strOpsFilteredSymbols = map (("S" ++) . toOrd) strOpsFiltered
-    
-    strOpsAndSymbols :: [(String, String)]
-    strOpsAndSymbols = zip strOpsFiltered strOpsFilteredSymbols
-
-    presentSymbols :: [String]
-    presentSymbols = 
-      map addCitationSigns (strOpsFilteredSymbols ++ literalsVar)
-
-    -- Defining the variables for the lexer.
-    plyTokens =
-      "tokens = reserved + (" ++ concat (map (++ ",") presentSymbols) ++ ")"
-    plyTokensRegEx = map createRegEx strOpsAndSymbols
-    
-    tokensPly :: [(String, String)]
-    tokensPly = reservedWordsEnv ++ strOpsAndSymbols 
-    
-    -- Comments
-    (multiMatchers, singleMatchers) = comments cf
-    singleComments = map createLineCommentMatcher singleMatchers
-    multiComments = map createMultiLineCommentMatcher multiMatchers
-
-
--- | Creates tokens for the lexer, such as "t_S_43 = r'\+'".
-createRegEx :: (String, String) -> String
-createRegEx (s, u) = "t_" ++ u ++ " = r'" ++ concat (map escapeChar s) ++ "'"
-
-
--- | For single-line comments
-createLineCommentMatcher :: String -> String
-createLineCommentMatcher r = unlines
-  [ "def t_C" ++ (toOrd r) ++ "(t):"
-  , "\tr'" ++ concat (map escapeChar r) ++ ".*'"
-  , "\tpass"
-  ]
-
-
--- | For multi-line comments
-createMultiLineCommentMatcher :: (String, String) -> String
-createMultiLineCommentMatcher (s, e) = unlines
-  [ "def t_C" ++ (toOrd (s ++ e)) ++ "(t):"
-  , "\tr'" ++ (escaped s) ++ "([\\s\\S]*?)" ++ (escaped e) ++ "'"
-  , "\tpass"
-  ]
-  where
-    escaped s = concat $ map escapeChar s
-
-
--- | The reserved_map contains mappings for reserved keywords,
---   such as 'int' : 'R_105_110_116'.
-createReservedMap :: [(String, String)] -> String
-createReservedMap xs = unlines 
-  [ "reserved_map = {"
-  , unlines rows
-  , "}"
-  , ""
-  , "reserved = ("
-  , unlines rowsSnd
-  , ")"
-  ]
-  where
-    rows :: [String]
-    rows = ["\t'" ++ w ++ "' : '" ++ u ++ "'," | (w, u) <- xs]
-
-    rowsSnd = ["\t'" ++ u ++ "'," | (_, u) <- xs]
-
-
--- | Creates lexer definitions for the lexer which are interpreted using
---   the inspect module to retrieve useful information, for example:
---   def t_String(t):
---     r'"[^"]+"'
---     t.type = reserved_map.get(t.value, ’String’)
---     return t
-plyLiterals :: CF -> String
-plyLiterals cf = unlines $ concat
-  [ 
-  ifC catString  [createLexFunc "String" "\"(\\\\\"|[^\"])*\""]
-  , ifC catChar    
-    [createLexFunc "Char" "\\'(\\\\x[0-9a-f][0-9a-f]|\\\\?[\\S\\s])\\'"]
-  , ifC catDouble  [createLexFunc "Double" "\\d+\\.\\d+(e-?\\d+)?"]
-  , ifC catInteger [createLexFunc "Integer" "\\d+"]
-  -- Prolog requires user defined tokens to have priority over Ident; C 
-  -- requires Double to have priority over user defined tokens, as C has
-  -- "CDouble" matching "3." in 3.14. The lexer definitions rely on the order
-  -- for priority, not the length.
-  , userDefTokens 
-  , ifC catIdent   [createLexFunc "Ident" "[A-Za-z]\\w*"]
-  -- If there is no Ident present, we need a lexer definition for reserved
-  -- words:
-  , if not (isUsedCat cf (TokenCat catIdent)) && length (reservedWords cf) > 0
-    then [createLexFunc "" "[A-Za-z]\\w*"]
-    else []
-  ]
-  where
-  ifC :: TokenCat -> [String] -> [String]
-  ifC cat s = if isUsedCat cf (TokenCat cat) then s else []
-
-  userDefTokens :: [String] 
-  userDefTokens = [
-    createLexFunc name (printRegFlex exp) | (name, exp) <- tokenPragmas cf
-    ]
-
-
--- | Creates a Lexing definition for a Literal
---   If no Literal name is used, this is just a reserved_map lookup.
-createLexFunc :: String -> String -> String
-createLexFunc name regex = unlines 
-  [ "def t_" ++ (if name /= "" then name else "_NoIdentPresent") ++ "(t):"
-  , "\tr'" ++ regex ++ "'"
-  , if name /= ""
-    then "\tt.type = reserved_map.get(t.value, '" ++ name ++ "')"
-    else "\tt.type = reserved_map.get(t.value)"
-  , "\treturn t"
-  ]
-
-
--- | Adds lexer definitions to ignore whitespaces, and a testing block
---   which attempts tokenize some input, like:  python3 LexTokens.py < input
-footer :: String
-footer = unlines 
-  [ "# Ignored characters:"
-  , "t_ignore = ' \\t'"
-  , ""
-  , "# Ignored token with an action associated with it:"
-  , "def t_ignore_newline(t):"
-  , "\tr'\\n+'"
-  , "\tt.lexer.lineno += t.value.count('\\n')"
-  , ""
-  , "# Error handler for illegal characters:"
-  , "def t_error(t):"
-  , "\tprint('Illegal character', 'line', str(t.lineno) + ':', t.value[0], 'ascii:', ord(t.value[0]))"
-  , "\tquit()"
-  , ""
-  , "if __name__ == \"__main__\":"
-  , "\tlexer = lex.lex()"
-  , "\tlex.runmain(lexer)"
-  ]
-
-

From 283dfd833168260c5512cd86f085583456a80904 Mon Sep 17 00:00:00 2001
From: AiStudent <diskfragmenteraren@gmail.com>
Date: Tue, 5 Nov 2024 16:09:08 +0100
Subject: [PATCH 6/8] Retargeted to Lark

---
 docs/user_guide.rst                         | 55 ++++++-------
 document/BNF_Converter_Python_Mode.html     | 68 ++++++-----------
 source/src/BNFC/Backend/Python.hs           | 85 +++++++++++----------
 source/src/BNFC/Backend/Python/CFtoPyAbs.hs | 10 +--
 4 files changed, 98 insertions(+), 120 deletions(-)

diff --git a/docs/user_guide.rst b/docs/user_guide.rst
index 4e517da9..c4e1e287 100644
--- a/docs/user_guide.rst
+++ b/docs/user_guide.rst
@@ -289,10 +289,9 @@ Python Backend
 ===============
 
 The BNF Converter's Python Backend generates a Python frontend, that uses 
-`PLY <https://www.dabeaz.com/ply/ply.html>`_ (Python Lex Yacc), to parse
-input into an abstract syntax tree.
+Lark, to parse input into an AST (abstract syntax tree).
 
-Python 3.10 or higher is needed.
+Lark and Python 3.10 or higher is needed.
 
 Example usage: ::
 
@@ -307,10 +306,8 @@ Example usage: ::
      - Description
    * - bnfcPyGenCalc/Absyn.py
      - Provides the classes for the abstract syntax.
-   * - bnfcPyGenCalc/LexTokens.py
-     - Provides PLY with the information needed to build the lexer.
    * - bnfcPyGenCalc/ParserDefs.py
-     - Provides PLY with the information needed to build the parser.
+     - Provides Lark with the information needed to build the lexer and parser.
    * - bnfcPyGenCalc/PrettyPrinter.py
      - Provides printing for both the AST and the linearized tree.
    * - genTest.py
@@ -318,7 +315,7 @@ Example usage: ::
    * - skele.py
      - Provides skeleton code to deconstruct an AST, using structural pattern matching.
 
-Optionally one may with ``-m`` also create a makefile that contains the target
+Optionally one may with ``-m``` also create a makefile that contains the target
 "distclean" to remove the generated files.
 
 Testing the frontend
@@ -340,34 +337,28 @@ and it's possible to just use an argument::
 Caveats
 .......
 
-Presentation of conflicts in a grammar:
-
-  A symbol-to-unicode transformation is made for the terminals in the grammar,
-  for example from "++" to "S_43_43". This however obfuscates PLYs generated
-  information of the grammar in the "parser.out" file. Users are hence
-  encouraged to use the Haskell backend to debug grammars and identify
-  conflicts.
-
 Several entrypoints:
+  The testfile genTest.py only uses the first entrypoint used by default. To
+  use all entrypoints, set the start parameter to "start_". If the 
+  entrypoints cause reduce/reduce conflicts, a lark GrammarError will be
+  produced.
 
-  At the top of the ParserDefs.py file an additional rule is added, that has
-  every defined entrypoint as a possible production. This may create warnings 
-  for conflicts, as it may introduce ambiguity. Therefore the added
-  parsing rule is by default removed beneath the function, with the statement
-  "del p__Start", and included if the user comments out the removal of
-  "p__Start".
-
-Special cases for special characters:
+Results from the parameterized tests:
+  While the Python backend generates working frontends for the example
+  grammars, five "failures" and six "errors" among the regression
+  tests are reported.
 
-  Using non-special characters, instead of say parentheses when defining rules,
-  may not yield the expected behaviour. Using the below rule, an expression
-  such as "a1+2a" can not be parsed since the a's are classified as reserved
-  keywords, like "int", instead of symbols like "+"::
+Skeleton code for using lists as entrypoints:
+  Matchers for using lists, such as [Exp], are not generated in the
+  skeleton code as it may confuse users if the grammar uses several different 
+  list categories, as a user may then try to pattern match lists without 
+  checking what type the elements have. Users are instead encouraged to use
+  non-list entrypoints. 
 
-    _. Exp1 ::= "a" Exp "a" ;
+Using multiple separators
+  Using multiple separators for the same category, such as below, generates
+  Python functions with overlapping names, causing runtime errors.::
 
-Results from the parameterized tests:
+    separator Exp1 "," ;
+    separator Exp1 ";" ;
 
-  While the Python backend generates working frontends for the example
-  grammars, four "failures" and six "errors" among the regression
-  tests are reported.
diff --git a/document/BNF_Converter_Python_Mode.html b/document/BNF_Converter_Python_Mode.html
index 8021b3e1..4ffb46f6 100644
--- a/document/BNF_Converter_Python_Mode.html
+++ b/document/BNF_Converter_Python_Mode.html
@@ -27,15 +27,15 @@ <h3>By Björn Werner</h3>
 <h3>2024</h3>
 <p>
   The BNF Converter's Python Backend generates a Python frontend, that uses 
-  PLY (Python Lex Yacc), to parse input into an AST (abstract syntax tree).
+  Lark, to parse input into an AST (abstract syntax tree).
 </p>
 <p>
   BNFC on Github:<br>
   <a href="https://github.com/BNFC/bnfc">https://github.com/BNFC/bnfc</a>
 </p>
 <p>
-  PLY homepage:<br>
-  <a href="https://www.dabeaz.com/ply/ply.html">https://www.dabeaz.com/ply/ply.html</a>
+  Lark github:<br>
+  <a href="https://github.com/lark-parser/lark">https://github.com/lark-parser/lark</a>
 </p>
 <p>
   Python 3.10 or higher is needed.
@@ -51,14 +51,11 @@ <h3>Usage</h3>
   <tr>
     <th>Filename:</th><th>Description:</th>
   </tr>
-  <tr>
-    <td>bnfcGenNAME/LexTokens.py</td><td>Provides PLY with the information needed to build the lexer.</td>
-  </tr>
   <tr>
     <td>bnfcGenNAME/Absyn.py</td><td>Provides the classes for the abstract syntax.</td>
   </tr>
   <tr>
-    <td>bnfcGenNAME/ParserDefs.py</td><td>Provides PLY with the information needed to build the parser.</td>
+    <td>bnfcGenNAME/ParserDefs.py</td><td>Provides Lark with the information needed to build the lexer and parser.</td>
   </tr>
   <tr>
     <td>bnfcGenNAME/PrettyPrinter.py</td><td>Provides printing for both the AST and the linearized tree.</td>
@@ -79,7 +76,6 @@ <h3>Testing the frontend</h3>
   $ python3 genTest.py < hello.c
 </p>
 <p style="font-family: monospace;">
-  Generating LALR tables<br>
   Parse Successful!<br>
   <br>
   [Abstract Syntax]<br>
@@ -92,9 +88,6 @@ <h3>Testing the frontend</h3>
     &nbsp;return 0;<br>
   }<br>
 </p>
-<p>
-  The LALR tables are cached in a file called "parsetab.py", and a description by PLY of the grammar is stored in a file called "parser.out".
-</p>
 <h3>The Abstract Syntax Tree</h3>
 <p>
   The AST is built up using instances of Python classes, using the dataclass decorator, such as:
@@ -150,34 +143,15 @@ <h3>Using the skeleton file</h3>
 </p>
   
 <h3>Known issues</h3>
-<h4>
-  Presentation of conflicts in a grammar:
-</h4>
-<p>
-  A symbol-to-unicode transformation is made for the terminals in the grammar,
-  for example from "++" to "S_43_43". This however obfuscates PLYs generated
-  information of the grammar, inside the "parser.out" file. Users are hence
-  encouraged to use say the Haskell backend to debug their
-  grammars and identify conflicts.
-</p>
-<h4>
-  Several entrypoints:
-</h4>
-<p>
-  At the top of the ParserDefs.py file an additional rule is added, that has
-  every defined entrypoint as a possible production. This may create warnings 
-  for conflicts if it introduces ambiguity, and warnings for unused rules if 
-  the "_Start" category is not used as the entrypoint. Therefore the added
-  parsing rule is by default removed beneath the function, "del p__Start",
-  and included if the user comments out the removal:
-</p>
 <h4>
   Skeleton code for using lists as entrypoints:
 </h4>
 <p>
   Matchers for using lists, such as [Exp], are not generated in the
   skeleton code as it may confuse users if the grammar uses several different 
-  list categories. Users are instead encouraged to use a non-list entrypoint. 
+  list categories, as a user may then try to pattern match lists without 
+  checking what type the elements have. Users are instead encouraged to use
+  non-list entrypoints. 
 </p>
 <p>
   The improper way to iterate over lists, as the value category is unknown:
@@ -195,16 +169,6 @@ <h4>
   &nbsp;&nbsp;for exp in listexp_:<br>
   &nbsp;&nbsp;&nbsp;...
 </p>
-<h4>
-  Special cases for special characters
-</h4>
-<p>
-  Using non-special characters instead of say parentheses when defining rules, may not yield the expected
-  behaviour. Using the below rule, an expression such as "a1+2a" can not be parsed.
-</p>
-<p style="font-family: monospace;">
-  _. Exp1 ::= "a" Exp "a" ;
-</p>
 <h4>
   Using multiple separators
 </h4>
@@ -215,4 +179,20 @@ <h4>
 <p style="font-family: monospace;">
   separator Exp1 "," ;<br>
   separator Exp1 ";" ;
-</p>
\ No newline at end of file
+</p>
+<h4>Several entrypoints:</h4>
+<p>
+  The testfile genTest.py only uses the first entrypoint used by default. To
+  use all entrypoints, set the start parameter to "start_". If the 
+  entrypoints cause reduce/reduce conflicts, a lark GrammarError will be
+  produced.
+</p>
+<h4>
+Results from the parameterized tests:
+</h4>
+<p>
+  While the Python backend generates working frontends for the example
+  grammars, five "failures" and six "errors" among the regression
+  tests are reported.
+</p>
+
diff --git a/source/src/BNFC/Backend/Python.hs b/source/src/BNFC/Backend/Python.hs
index 13568e38..87ce07bb 100644
--- a/source/src/BNFC/Backend/Python.hs
+++ b/source/src/BNFC/Backend/Python.hs
@@ -18,6 +18,7 @@ import BNFC.Backend.Python.CFtoPyPrettyPrinter (cf2PyPretty)
 import BNFC.Backend.Python.CFtoPySkele (cf2PySkele)
 import BNFC.Backend.Python.PyHelpers
 import BNFC.PrettyPrint
+import Data.Char  (toLower)
 import qualified BNFC.Backend.Common.Makefile as Makefile
 
 
@@ -87,44 +88,50 @@ comment x = "# " ++ x
 -- Produces the content for the testing file, genTest.py.
 pyTest :: String -> CF -> String
 pyTest pkgName cf = unlines 
-  [
-    "import sys",
-    "from " ++ pkgName ++ ".ParsingDefs import *",
-    "from " ++ pkgName ++ ".PrettyPrinter import *",
-    "",
-    "",
-    "# Suggested input options:",
-    "# python3 genTest.py < sourcefile",
-    "# python3 genTest.py sourcefile inputfile (i.e. for interpreters).",
-    "inputFile = None",
-    "if len(sys.argv) > 1:",
-    "\tf = open(sys.argv[1], 'r')",
-    "\tinp = f.read()",
-    "\tf.close()",
-    "\tif len(sys.argv) > 2:",
-    "\t\tinputFile = sys.argv[2]",
-    "else:",
-    "\tinp = ''",
-    "\tfor line in sys.stdin:",
-    "\t\tinp += line",
-    "",
-    "def onError(e):",
-    "  print(e)",
-    "  print('Parse failed')",
-    "  quit(1)",
-    "",
-    "# By default the first entrypoint is used. See ParsingDefs.py for alternatives.",
-    "ast = parser.parse(inp, on_error=onError)",
-    "if ast: # and not lexer.syntaxError:",
-      "\tprint('Parse Successful!\\n')",
-      "\tprint('[Abstract Syntax]')",
-      "\tprint(printAST(ast))",
-      "\tprint('\\n[Linearized Tree]')",
-      "\tlinTree = lin(ast)",
-      "\tprint(renderC(linTree))",
-      "\tprint()",
-    "else:",
-      "\tprint('Parse failed')",
-      "\tquit(1)"
+  [ "import sys"
+  , "from " ++ pkgName ++ ".ParsingDefs import *"
+  , "from " ++ pkgName ++ ".PrettyPrinter import *"
+  , ""
+  , "# Suggested input options:"
+  , "# python3 genTest.py < sourcefile"
+  , "# python3 genTest.py sourcefile inputfile (i.e. for interpreters)."
+  , "inputFile = None"
+  , "if len(sys.argv) > 1:"
+  , "\tf = open(sys.argv[1], 'r')"
+  , "\tinp = f.read()"
+  , "\tf.close()"
+  , "\tif len(sys.argv) > 2:"
+  , "\t\tinputFile = sys.argv[2]"
+  , "else:"
+  , "\tinp = ''"
+  , "\tfor line in sys.stdin:"
+  , "\t\tinp += line"
+  , ""
+  , "def onError(e):"
+  , "  print(e)"
+  , "  print('Parse failed')"
+  , "  quit(1)"
+  , ""
+  , "# Creates the Lark parser with the given grammar. By default to the first"
+  , "# entrypoint. Other entrypoints exist in ParsingDefs.py."
+  , "parser = Lark(grammar, start='" ++ defaultEntrypoint ++ "', parser='lalr', lexer='basic', transformer=TreeTransformer())"
+  , ""
+  , "# By default the first entrypoint is used. See ParsingDefs.py for alternatives."
+  , "ast = parser.parse(inp, on_error=onError)"
+  , "if ast: # and not lexer.syntaxError:"
+  , "\tprint('Parse Successful!\\n')"
+  , "\tprint('[Abstract Syntax]')"
+  , "\tprint(printAST(ast))"
+  , "\tprint('\\n[Linearized Tree]')"
+  , "\tlinTree = lin(ast)"
+  , "\tprint(renderC(linTree))"
+  , "\tprint()"
+  , "else:"
+  , "\tprint('Parse failed')"
+  , "\tquit(1)"
   ]
+  where
+    defaultEntrypoint = map toLower 
+      ((translateToList . show . firstEntry) cf)
+
 
diff --git a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
index adad97f3..3daefee6 100644
--- a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
+++ b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
@@ -30,10 +30,6 @@ cf2PyAbs pkgName cf = ( unlines
   , createGrammar cf
   , createTransformer cf
   , ""
-  , "# Create Lark parser with the given grammar"
-  , "parser = Lark(grammar, start='start', parser='lalr', lexer='basic', " ++
-    "transformer=TreeTransformer())"
-  , ""
   ]
   , unlines 
   ["from typing import List as _List"
@@ -78,7 +74,7 @@ cf2PyAbs pkgName cf = ( unlines
 createGrammar :: CF -> String
 createGrammar cf = unlines 
   [ "grammar = r\"\"\""
-  , "  ?start: " ++ map toLower ((translateToList .show . firstEntry) cf)
+  , "  ?start_: " ++ entryOrClause
   , ""
   , unlines orClauses
   , larkLiterals cf
@@ -100,6 +96,10 @@ createGrammar cf = unlines
     singleComments = map createLineCommentMatcher singleMatchers
     multiComments = map createMultiLineCommentMatcher multiMatchers
 
+    strListEntryPoints = map ((map toLower) .  translateToList . show) 
+      ((List1.toList . allEntryPoints) cf)
+    entryOrClause = intercalate "\n  | " strListEntryPoints
+
 
 -- Enumerates all (only defined relevant) rules to prevent naming overlap.
 enumerateAllDefinedRules :: [Rul RFun] -> Int -> [(Int, Rul RFun)]

From f49fa38aa4a8a3d4f544fd8d12c6d839567d9612 Mon Sep 17 00:00:00 2001
From: AiStudent <diskfragmenteraren@gmail.com>
Date: Mon, 11 Nov 2024 02:18:44 +0100
Subject: [PATCH 7/8] 0 failures and 3 errors left due to an example with 1000+
 recursion and to the lex prio not working as expected

---
 source/src/BNFC/Backend/Python.hs             |  50 +++----
 source/src/BNFC/Backend/Python/CFtoPyAbs.hs   | 133 ++++++++----------
 .../Backend/Python/CFtoPyPrettyPrinter.hs     |  43 +++---
 source/src/BNFC/Backend/Python/CFtoPySkele.hs |  55 ++++----
 source/src/BNFC/Backend/Python/PyHelpers.hs   |  56 ++++++++
 5 files changed, 188 insertions(+), 149 deletions(-)

diff --git a/source/src/BNFC/Backend/Python.hs b/source/src/BNFC/Backend/Python.hs
index 87ce07bb..e0dc012d 100644
--- a/source/src/BNFC/Backend/Python.hs
+++ b/source/src/BNFC/Backend/Python.hs
@@ -18,14 +18,14 @@ import BNFC.Backend.Python.CFtoPyPrettyPrinter (cf2PyPretty)
 import BNFC.Backend.Python.CFtoPySkele (cf2PySkele)
 import BNFC.Backend.Python.PyHelpers
 import BNFC.PrettyPrint
-import Data.Char  (toLower)
+import Data.Char  (toLower, isLetter)
 import qualified BNFC.Backend.Common.Makefile as Makefile
 
 
 -- | Entrypoint for BNFC to use the Python backend.
 makePython :: SharedOptions -> CF -> MkFiles ()
 makePython opts cf = do
-    let pkgName =  "bnfcPyGen" ++ name
+    let pkgName =  "bnfcPyGen" ++ filter isLetter name
     let (parsingDefs, abstractClasses) = cf2PyAbs pkgName cf
     let prettyPrinter = cf2PyPretty pkgName cf
     let skeletonCode = cf2PySkele pkgName cf
@@ -47,7 +47,7 @@ makefile :: String -> Maybe String -> String -> Doc
 makefile pkgName optMakefileName basename = vcat
   [
     Makefile.mkRule "all" []
-      [ " " ]
+      [ "@echo \"Doing nothing: No compilation of the parser needed.\"" ]
   ,  Makefile.mkRule "clean" []
       [ "rm -f parser.out parsetab.py" ]
   , Makefile.mkRule "distclean" [ "vclean" ] []
@@ -90,27 +90,27 @@ pyTest :: String -> CF -> String
 pyTest pkgName cf = unlines 
   [ "import sys"
   , "from " ++ pkgName ++ ".ParsingDefs import *"
-  , "from " ++ pkgName ++ ".PrettyPrinter import *"
+  , "from " ++ pkgName ++ ".PrettyPrinter import printAST, lin, renderC"
   , ""
   , "# Suggested input options:"
   , "# python3 genTest.py < sourcefile"
   , "# python3 genTest.py sourcefile inputfile (i.e. for interpreters)."
   , "inputFile = None"
   , "if len(sys.argv) > 1:"
-  , "\tf = open(sys.argv[1], 'r')"
-  , "\tinp = f.read()"
-  , "\tf.close()"
-  , "\tif len(sys.argv) > 2:"
-  , "\t\tinputFile = sys.argv[2]"
+  , "    f = open(sys.argv[1], 'r')"
+  , "    inp = f.read()"
+  , "    f.close()"
+  , "    if len(sys.argv) > 2:"
+  , "        inputFile = sys.argv[2]"
   , "else:"
-  , "\tinp = ''"
-  , "\tfor line in sys.stdin:"
-  , "\t\tinp += line"
+  , "    inp = ''"
+  , "    for line in sys.stdin:"
+  , "        inp += line"
   , ""
   , "def onError(e):"
-  , "  print(e)"
-  , "  print('Parse failed')"
-  , "  quit(1)"
+  , "    print(e)"
+  , "    print('Parse failed')"
+  , "    quit(1)"
   , ""
   , "# Creates the Lark parser with the given grammar. By default to the first"
   , "# entrypoint. Other entrypoints exist in ParsingDefs.py."
@@ -118,17 +118,17 @@ pyTest pkgName cf = unlines
   , ""
   , "# By default the first entrypoint is used. See ParsingDefs.py for alternatives."
   , "ast = parser.parse(inp, on_error=onError)"
-  , "if ast: # and not lexer.syntaxError:"
-  , "\tprint('Parse Successful!\\n')"
-  , "\tprint('[Abstract Syntax]')"
-  , "\tprint(printAST(ast))"
-  , "\tprint('\\n[Linearized Tree]')"
-  , "\tlinTree = lin(ast)"
-  , "\tprint(renderC(linTree))"
-  , "\tprint()"
+  , "if ast:"
+  , "    print('Parse Successful!\\n')"
+  , "    print('[Abstract Syntax]')"
+  , "    print(printAST(ast))"
+  , "    print('\\n[Linearized Tree]')"
+  , "    linTree = lin(ast)"
+  , "    print(renderC(linTree))"
+  , "    print()"
   , "else:"
-  , "\tprint('Parse failed')"
-  , "\tquit(1)"
+  , "    print('Parse failed')"
+  , "    quit(1)"
   ]
   where
     defaultEntrypoint = map toLower 
diff --git a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
index 3daefee6..dd346984 100644
--- a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
+++ b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
@@ -13,7 +13,7 @@ import BNFC.Backend.Python.RegToFlex (printRegFlex, escapeChar)
 import BNFC.Backend.Common.NamedVariables
 import Text.PrettyPrint (Doc, render)
 import Data.Either   (lefts)
-import Data.Char                    (toLower, toUpper)
+import Data.Char                    (toLower, toUpper, isLower)
 import qualified Data.List.NonEmpty as List1
 
 
@@ -29,6 +29,7 @@ cf2PyAbs pkgName cf = ( unlines
   , ""
   , createGrammar cf
   , createTransformer cf
+  , createDefineFunctions cf
   , ""
   ]
   , unlines 
@@ -59,8 +60,8 @@ cf2PyAbs pkgName cf = ( unlines
     
     -- Note: Custom tokens are set to inherit "str".
     valueCatNames = nub $ 
-      (map (show . normCat . valCat) rulesNoListConstructors) ++ 
-      (map (++"(str)") (tokenNames cf)) ++ 
+      (map (unkw . show . normCat . valCat) rulesNoListConstructors) ++ 
+      (map ((++ "(str)") . unkw) (tokenNames cf)) ++ 
       [ "String(str)"
       , "Char(str)"
       , "Ident(str)"
@@ -90,7 +91,7 @@ createGrammar cf = unlines
 
     enumeratedRules :: [(Int, Rul RFun)]
     enumeratedRules = enumerateAllDefinedRules rs 1 []
-    orClauses = map (createOrClause cf enumeratedRules) aCats
+    orClauses = map (createOrClause enumeratedRules) aCats
 
     (multiMatchers, singleMatchers) = comments cf
     singleComments = map createLineCommentMatcher singleMatchers
@@ -111,8 +112,8 @@ enumerateAllDefinedRules (r:rs) n irs
 
 
 -- Creates an or clause with all rules for a given category.
-createOrClause :: CF -> [(Int, Rul RFun)] -> Cat -> String
-createOrClause cf irs c = unlines
+createOrClause :: [(Int, Rul RFun)] -> Cat -> String
+createOrClause irs c = unlines
   [ "  ?" ++ map toLower (translateToList (show c)) ++ ": " ++ 
     intercalate "\n  | " 
       (map createProdAndNameForRule catsIrs)
@@ -132,23 +133,25 @@ createProdAndNameForRule (n, r) =  prodToDocStr (rhsRule r) ++
       | isOneFun r = "one" ++ (identCat . valCat) r
       | isConsFun r = "cons" ++ (identCat . valCat) r
       | isDefinedRule r = "d" ++ show n ++ "_r_" ++ funName r
-      | otherwise = "r_" ++ funName r 
+      | otherwise = "r_" ++ map toLower (funName r) ++ toOrd (funName r) 
 
 
--- Creates the literals for a grammar for Lark.
+-- Creates the literals for a grammar for Lark.Priority is set after the 
+-- dot, such as "Name.PRIO". For literals with the same priority, it appears
+-- that Lark (with basic mode) prioritizes the longest regular 
+-- expression, not the longest matched literal.
 larkLiterals :: CF -> String
 larkLiterals cf = unlines $ concat
   [ 
-  ifC catString  [createLiteral "String.2" "\"(\\\\.|[^\"])*\""]
-  , ifC catChar  [createLiteral "Char.2" "\\'(\\\\x[0-9a-f][0-9a-f]|\\\\?[\\S\\s])\\'"]
-  , ifC catDouble  [createLiteral "Double.2" "\\d+\\.\\d+(e-?\\d+)?"]
-  , ifC catInteger [createLiteral "Integer.2" "\\d+"]
+  ifC catString  [createLiteral "String.1" "\"(\\\\.|[^\"])*\""]
+  , ifC catChar  [createLiteral "Char.1" "\\'(\\\\x[0-9a-f][0-9a-f]|\\\\?[\\S\\s])\\'"]
+  , ifC catDouble  [createLiteral "Double.1" "\\d+\\.\\d+(e-?\\d+)?"]
+  , ifC catInteger [createLiteral "Integer.1" "\\d+"]
   -- Prolog requires user defined tokens to have priority over Ident; C 
   -- requires Double to have priority over user defined tokens, as C has
-  -- "CDouble" matching "3." in 3.14. The lexer definitions rely on the order
-  -- for priority, not the length.
+  -- "CDouble" matching "3." in 3.14.
   , userDefTokens 
-  , ifC catIdent   [createLiteral "Ident" "[A-Za-z]\\w*"]
+  , ifC catIdent   [createLiteral "Ident" "[A-Za-z_]\\w*"]
   ]
   where
     ifC :: TokenCat -> [String] -> [String]
@@ -156,7 +159,7 @@ larkLiterals cf = unlines $ concat
 
     userDefTokens :: [String] 
     userDefTokens = [
-      createLiteral (name) (printRegFlex exp) | (name, exp) <- tokenPragmas cf
+      createLiteral name (printRegFlex exp) | (name, exp) <- tokenPragmas cf
       ]
 
     createLiteral :: String -> String -> String
@@ -171,7 +174,7 @@ createTransformer cf = unlines
   [ "#transformer"
   , "class TreeTransformer(Transformer):"
   , unlines (map createRuleTransform rs)
-  , unlines (map (makeDefineTransform cf) enumeratedRDs)
+  , unlines (map makeDefineTransform enumeratedRDs)
   , unlines (map createListTransform listRules)
   , createTokenTransformers cf
   ]
@@ -196,14 +199,16 @@ createTransformer cf = unlines
 createRuleTransform :: Rul RFun -> String
 createRuleTransform r = unlines
   [ "  @v_args(inline=True)"
-  , "  def r_" ++ map toLower (funName r) ++ "(self" ++ 
+  , "  def r_" ++ nameWithUnicode ++ "(self" ++ 
     concat (map (", " ++) enumeratedVars) ++ "):"
-  , "    return " ++ funName r ++ "(" ++ intercalate ", " enumeratedVars ++ ")"
+  , "    return " ++ className ++ "(" ++ intercalate ", " enumeratedVars ++ ")"
   ] 
   where
+    nameWithUnicode = map toLower (funName r) ++ toOrd (funName r)
+    className = unkw (funName r)
     sentForm = rhsRule r
     nvCats = numVars sentForm :: [Either (Cat, Doc) String]
-    enumeratedVars = [render d | (c, d) <- lefts nvCats]
+    enumeratedVars = [render d | (_, d) <- lefts nvCats]
 
 
 -- Creates a transform for a list rule.
@@ -223,7 +228,7 @@ createListTransform r = unlines
 
     sentForm = rhsRule r
     nvCats = numVars sentForm :: [Either (Cat, Doc) String]
-    enumeratedVars = [render d | (c, d) <- lefts nvCats]
+    enumeratedVars = [render d | (_, d) <- lefts nvCats]
 
     args :: String
       | isNilFun r = "[]"
@@ -241,10 +246,6 @@ createTokenTransformers cf = unlines $ concat
   , ifC catChar  [createTokenTransform "Char"]
   , ifC catDouble  [createTokenTransform "Double"]
   , ifC catInteger [createTokenTransform "Integer"]
-  -- Prolog requires user defined tokens to have priority over Ident; C 
-  -- requires Double to have priority over user defined tokens, as C has
-  -- "CDouble" matching "3." in 3.14. The lexer definitions rely on the order
-  -- for priority, not the length.
   , userDefTokens 
   , ifC catIdent   [createTokenTransform "Ident"]
   ]
@@ -254,7 +255,7 @@ createTokenTransformers cf = unlines $ concat
 
   userDefTokens :: [String] 
   userDefTokens = [
-    createTokenTransform name | (name, exp) <- tokenPragmas cf
+    createTokenTransform name | (name, _) <- tokenPragmas cf
     ]
 
 
@@ -263,7 +264,7 @@ createTokenTransform :: String -> String
 createTokenTransform name = unlines
   [ "  @v_args(inline=True)"
   , "  def " ++ map toUpper name ++ "(self, token):"
-  , "    return " ++ name ++ "(token.value)"
+  , "    return " ++ unkw name ++ "(token.value)"
   ] 
 
 
@@ -316,40 +317,38 @@ nameCorresponds _ _ = error "Names can't be empty"
 
 
 -- Creates a transformer for a rule with its corresponding define.
-makeDefineTransform :: 
-  CF -> (Int, Rul RFun, Define) -> String
-makeDefineTransform cf (n, defRule, defi)  = unlines
+makeDefineTransform :: (Int, Rul RFun, Define) -> String
+makeDefineTransform (n, defRule, defi)  = unlines
   [ "  @v_args(inline=True)"
   , "  def d" ++ show n ++ "_r_" ++ map toLower name ++ "(self" ++ 
     concat (map (", " ++) enumeratedVars) ++ "):"
-  , "    return " ++ expToDef env2 (defBody defi)
+  , "    return d_" ++ name ++ "(" ++ intercalate ", " enumeratedVars ++ ")"
   , ""
   ]
   where
     name = (wpThing . defName) defi
     sentForm = rhsRule defRule
-    args = map fst (defArgs defi)
     nvCats = numVars sentForm :: [Either (Cat, Doc) String]
-    enumeratedVars = [render d | (c, d) <- lefts nvCats]
-    env2 = zip args enumeratedVars
+    enumeratedVars = [render d | (_, d) <- lefts nvCats]
 
 
 -- | Converts the production of a define, called an expression, to a
 --   production for the parsing definition.
-expToDef :: [(String, String)] -> Exp -> String
-expToDef env (App "(:)" _ (e:[App "[]" _ _])) = expToDef env e ++ "]"
-expToDef env (App "(:)" _ (e:[recList])) = "[" ++ expToDef env e ++ ", " ++
-  expToDef env recList
+expToDef :: CF -> Exp -> String
+expToDef cf (App "(:)" _ (e:[App "[]" _ _])) = expToDef cf e ++ "]"
+expToDef cf (App "(:)" _ (e:[recList])) = "[" ++ expToDef cf e ++ ", " ++
+  expToDef cf recList
 expToDef _ (App "[]" _ _) = "[]"
-expToDef env (App fName _ exps) = 
-  fName ++ "(" ++ addCommas (map (expToDef env) exps) ++ ")"
-expToDef env (Var s) = case lookup s env of
-  Just p -> p
-  Nothing -> error "Missing variable in define enviroment"
+expToDef cf (App fName _ exps)
+  | isLower (head fName) = 
+    "d_" ++ fName  ++ "(" ++ addCommas (map (expToDef cf) exps) ++ ")"
+  | otherwise = 
+    unkw fName ++ "(" ++ addCommas (map (expToDef cf) exps) ++ ")"
+expToDef _ (Var s) = unkw s
 expToDef _ (LitInt i) = "Integer(" ++ show i ++ ")"
 expToDef _ (LitDouble d) = "Double(" ++ show d ++ ")"
 expToDef _ (LitChar s) = "Char(\"" ++ show s ++ "\")"
-expToDef _ (LitString s) = "String('" ++ show s ++ "')"
+expToDef _ (LitString s) = "String('" ++ s ++ "')"
 
 
 -- A placeholder variable to store additional information, for say type
@@ -380,29 +379,6 @@ placeholderVariableClass = unlines
   ]     
 
 
--- | Creates a parsing definition that points to all entrypoints.
-createCommonEntrypointDef :: CF -> String
-createCommonEntrypointDef cf = unlines 
-  [ "def p__Start(p):"
-  , "  '''"
-  , "  _Start : " ++ (translateToList . show . head) cats ++
-    concat (map createCase (tail cats))
-  , "  '''"
-  , "  p[0] = p[1]"
-  , ""
-  , ""
-  , "# Comment the below line to enable the '_Start' entrypoint (may yield"
-    ++ " conflict warnings)."
-  , "del p__Start"
-  , ""
-  ]
-  where
-    cats = (List1.toList . allEntryPoints) cf
-
-    createCase :: Cat -> String
-    createCase c = "\n         | " ++ translateToList (show c)
-
-
 -- | The value categories become abstract classes, for type hinting.
 createValueCatClass :: String -> String
 createValueCatClass s = "class " ++ s ++ ":\n\tpass\n"
@@ -412,10 +388,10 @@ createValueCatClass s = "class " ++ s ++ ":\n\tpass\n"
 makePythonClass :: Rul RFun -> String
 makePythonClass rule = 
   "@dataclass\n" ++
-  "class " ++ name ++ ":\n" ++
+  "class " ++ className ++ ":\n" ++
     if length cats == 0 then "\tpass\n" else classBody
   where
-    name = funName rule
+    className = unkw (funName rule)
     sentForm = rhsRule rule
     cats = lefts sentForm
     nvCats = numVars sentForm :: [Either (Cat, Doc) String]
@@ -429,7 +405,22 @@ makePythonClass rule =
 
 -- | Creates the corresponding type hinting for some member variable.
 strCatToPyTyping :: String -> String
-strCatToPyTyping s = 
-  if strIsList s then "_List['" ++ (tail . init) s ++ "']" else s
+strCatToPyTyping s = if strIsList s 
+  then "_List['" ++ (unkw . tail . init) s ++ "']" 
+  else unkw s
+
+
+-- | Creates functions for the defines.
+createDefineFunctions :: CF -> String
+createDefineFunctions cf = unlines 
+  (map (createDefineFunction cf) (definitions cf))
 
 
+createDefineFunction :: CF -> Define -> String
+createDefineFunction cf d = unlines
+  [ "def d_" ++ (wpThing . defName) d ++ "(" ++ addCommas args ++  "):"
+  , "  return " ++ expToDef cf (defBody d)
+  ]
+  where
+    args = map (unkw . fst) (defArgs d)
+
diff --git a/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs b/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs
index 20255c19..352f12c1 100644
--- a/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs
+++ b/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs
@@ -67,8 +67,8 @@ makePrintAST cf = concat
     ]
   , if length (tokenNames cf) > 0
     then unlines 
-      [ "    case (" ++ intercalate " | " (map (++"()") (tokenNames cf)) 
-        ++ "):"
+      [ "    case (" ++ intercalate " | " 
+        (map ((++ "()") . unkw) (tokenNames cf)) ++ "):"
       , "      return '\"' + str(ast) + '\"'"
       ]
     else ""
@@ -76,7 +76,8 @@ makePrintAST cf = concat
   , "      return '[' + ', '.join([printAST(a) for a in ast]) + ']'\n"
   , "\n"
   , "  if len(vars(ast)) > 0:\n"
-  , "    return '(' + ast.__class__.__name__ + ' ' + ' '.join([printAST(vars(ast)[k]) for k in vars(ast) if k != '_ann_type']) + ')'\n"
+  , "    return '(' + ast.__class__.__name__ + ' ' + ' '.join(" ++ 
+    "[printAST(vars(ast)[k]) for k in vars(ast) if k != '_ann_type']) + ')'\n"
   , "  else:\n"
   , "    return ast.__class__.__name__\n"
   ]
@@ -107,7 +108,7 @@ makeListDecon cf c = concat
   ]
   where
     name = show $ catOfList c
-    listRulesForCat = rulesForCat cf c
+    listRulesForCat = [ r | r <- cfgRules cf, isParsable r, valCat r == c]
 
     nilRule = case [r | r <- listRulesForCat, isNilFun r] of
       [] -> Nothing
@@ -122,14 +123,14 @@ makeListDecon cf c = concat
     -- List rules are of the form:
     -- [C] ::= symbols.. C symbols.. [C]
     -- The production, in Python, is concatenated recursively:
-    -- symbols.. + lin(xs[0]) + symbols.. + listCDecon(xs[1:]) + symbols..
+    -- symbols.. + c(xs[0], 'C1') + symbols.. + listCDecon(xs[1:]) + symbols..
     sentFormToArgs :: Int -> [Either Cat String] -> String
     sentFormToArgs _ [] = "[]"
     sentFormToArgs v (Right strOp:ecss) =
       "['" ++ escapeChars strOp ++ "'] + " ++ 
       sentFormToArgs v ecss
     sentFormToArgs v (Left _:ecss)
-      | v == 0 = "lin(xs[0]) + " ++ sentFormToArgs (v+1) ecss
+      | v == 0 = "c(xs[0], '" ++ name ++ "') + " ++ sentFormToArgs (v+1) ecss
       | v == 1 = "list" ++ name ++ "Decon(xs[1:]) + " ++ 
       sentFormToArgs (v+1) ecss
       | otherwise = error "A list production can max have C and [C]."
@@ -165,14 +166,6 @@ makeRenderC = unlines
   , "  def ident(i):"
   , "    return '  ' * iLevel"
   , ""
-  , "  def removeTrailingWhitespace(tot):"
-  , "    i = len(tot)"
-  , "    while i > 0:"
-  , "      if tot[i] == ' ':"
-  , "        i -= 1"
-  , "      else:"
-  , "        break"
-  , ""
   , "    return tot[:i]"
   , ""
   , "  def oneEmptyLine(tot):"
@@ -211,7 +204,11 @@ makeRenderC = unlines
   , "      case ' ':"
   , "        tot += s"
   , "      case _:"
-  , "        tot += s + ' '"
+  , "        if s[-1] == ' ':" -- To not extend separators of spaces.
+  , "          tot = tot.rstrip()"
+  , "          tot += s"
+  , "        else:"
+  , "          tot += s + ' '"
   , ""
   , "  return tot"
   ]
@@ -221,7 +218,7 @@ makeRenderC = unlines
 makeCoercCompare :: CF -> String
 makeCoercCompare cf = concat 
   [ "cdict = {\n"
-  , unlines (map (\(fs, cs) -> "  " ++ fs ++ " : '" ++ cs ++ "',") scs)
+  , unlines (map (\(fs, cs) -> "  " ++ unkw fs ++ " : '" ++ cs ++ "',") scs)
   , "}"
   ]
   where
@@ -278,7 +275,7 @@ makeLinFunc cf = unlines
       ]
     , ifUsedThen catString 
       [ "    case String():"
-      , "      return [ast, ' ']"
+      , "      return [ast]"
       ]
     , ifUsedThen catIdent 
       [ "    case Ident():"
@@ -350,8 +347,8 @@ makeListEntrypointCase cf c = concat
   ]
   where
     constructors = if isTokenCat c 
-      then [show c ++ "()"]
-      else map ((++ "()") . funName) 
+      then [unkw (show c) ++ "()"]
+      else map ((++ "()") . unkw . funName) 
         [ 
           r | r <- rulesForNormalizedCat cf (normCat c), 
           not (isCoercion r), 
@@ -362,7 +359,7 @@ makeListEntrypointCase cf c = concat
 -- Creates a case for a user defined literal, which inherits str.
 makeSkeleTokenCase :: String -> String
 makeSkeleTokenCase tokenName = concat
-  [ "    case " ++ tokenName ++ "():\n"
+  [ "    case " ++ unkw tokenName ++ "():\n"
   , "      return [ast]"
   ]
 
@@ -371,7 +368,7 @@ makeSkeleTokenCase tokenName = concat
 --   separator- and terminator-delimiters there are. 
 makeSkeleRuleCase :: Rul RFun -> String
 makeSkeleRuleCase rule = concat
-  [ "    case " ++ fName ++ "(" ++ varNamesCommad ++ "):\n"
+  [ "    case " ++ unkw fName ++ "(" ++ varNamesCommad ++ "):\n"
   , "      # " ++ (showEcss sentForm) ++ "\n"
   , "      return " ++ if (length args > 0)
     then (intercalate " + " args) 
@@ -382,8 +379,7 @@ makeSkeleRuleCase rule = concat
     sentForm = rhsRule rule
 
     nvCats = numVars sentForm :: [Either (Cat, Doc) String]
-
-    enumeratedVarNames = [render d | (c, d) <- lefts nvCats]
+    enumeratedVarNames = [render d | (_, d) <- lefts nvCats]
 
     varNamesCommad = if length enumeratedVarNames > 0 
       then addCommas (enumeratedVarNames ++ ["_ann_type"])
@@ -406,4 +402,5 @@ ecssAndVarsToList (Left c:ecss) (s:ss)
     name = show $ catOfList c
 ecssAndVarsToList (Right strOp:ecss) ss = 
   ["['" ++ escapeChars strOp ++ "']"] ++ ecssAndVarsToList ecss ss
+ecssAndVarsToList ((Left _):_) [] = error "Missing variable name"
 
diff --git a/source/src/BNFC/Backend/Python/CFtoPySkele.hs b/source/src/BNFC/Backend/Python/CFtoPySkele.hs
index 5297fa02..26904764 100644
--- a/source/src/BNFC/Backend/Python/CFtoPySkele.hs
+++ b/source/src/BNFC/Backend/Python/CFtoPySkele.hs
@@ -11,16 +11,13 @@ import Data.Char     (toLower)
 import BNFC.Backend.Common.NamedVariables
 import Text.PrettyPrint (Doc, render)
 import Data.Either   (lefts)
+import Data.List     (intercalate)
 
 -- | Entrypoint.
 cf2PySkele :: String -> CF -> String
 cf2PySkele pkgName cf = unlines
-  [ "from ply.lex import lex"
-  , "from ply.yacc import yacc"
-  , "import sys"
-  , "from " ++ pkgName ++ ".LexTokens import *"
-  , "from " ++ pkgName ++ ".ParsingDefs import *"
-  , "from " ++ pkgName ++ ".PrettyPrinter import *"
+  ["from " ++ pkgName ++ ".Absyn import *"
+  , ""
   , ""
   , makeSkele cf
   ]
@@ -32,15 +29,16 @@ makeSkele :: CF -> String
 makeSkele cf = unlines 
   [ "# Categories combined into one matcher"
   , "def skeleMatcher(ast: object):"
-  , "\tmatch ast:"
-  , unlines skeleLiteralCases
-  , unlines skeleTokenCases
-  , unlines skeleRuleCases
-  , "\t\tcase _:"
-  , "\t\t\traise Exception(str(ast.__class__) + ' unmatched')"
+  , ind 1 "match ast:"
+  , intercalate "\n" skeleLiteralCases
+  , intercalate "\n" skeleTokenCases
+  , intercalate "\n" skeleRuleCases
+  , ind 2   "case _:"
+  , ind 3     "raise Exception(str(ast.__class__) + ' unmatched')"
+  , ""
   , ""
   , "# Categories split into their own matchers"
-  , unlines matchersOnCats               
+  , unlines matchersOnCats
   ]
   where
     rules = 
@@ -72,12 +70,13 @@ makeSkele cf = unlines
 
 -- Creates a matcher for some value category.
 makeMatcherOnCat :: (Cat, [Rul RFun]) -> String
-makeMatcherOnCat (c, rules) = unlines 
+makeMatcherOnCat (c, rules) = unlines
   [ "def matcher" ++ show c ++ "(" ++ varName ++ ": " ++ show c ++ "):"
-  , "\tmatch " ++ varName ++ ":"
-  , unlines cases
-  ,"\t\tcase _:"
-  ,"\t\t\traise Exception(str(" ++ varName ++ ".__class__) + ' unmatched')"
+  , ind 1 "match " ++ varName ++ ":"
+  , intercalate "\n" cases
+  , ind 2   "case _:"
+  , ind 3     "raise Exception(str(" ++ varName ++ ".__class__) + ' unmatched')"
+  , ""
   ]
   where
     varName = map toLower (show c) ++ "_"
@@ -88,27 +87,23 @@ makeMatcherOnCat (c, rules) = unlines
 
 -- | Creates a case for some rule.
 makeSkeleRuleCase :: Rul RFun -> String
-makeSkeleRuleCase rule = concat 
-  [ "\t\tcase " ++ fName ++ "(" ++ varNamesCommad ++ "):\n"
-  , "\t\t\t# " ++ (showEcss sentForm) ++ "\n"
-  , "\t\t\traise Exception('" ++ fName ++ " not implemented')"
+makeSkeleRuleCase rule = intercalate "\n"
+  [ ind 2 "case " ++ name ++ "(" ++ varNamesCommad ++ "):"
+  , ind 3   "# " ++ (showEcss sentForm)
+  , ind 3   "raise Exception('" ++ name ++ " not implemented')"
   ]
   where
-    funcRStr = funRule rule :: RString
-    fName = wpThing funcRStr :: String
+    name = unkw (funName rule)
     sentForm = rhsRule rule
-
     nvCats = numVars sentForm :: [Either (Cat, Doc) String]
-
     enumeratedVarNames = [render d | (_, d) <- lefts nvCats]
-
     varNamesCommad = addCommas (enumeratedVarNames ++ ["_ann_type"])
 
 
 -- | Creates a case for a user-defined token.
 makeSkeleTokenCase :: String -> String
-makeSkeleTokenCase tokenName = concat 
-  [ "\t\tcase " ++ tokenName ++ "():\n"
-  , "\t\t\traise Exception('not implemented')"
+makeSkeleTokenCase tokenName = intercalate "\n"
+  [ ind 2 "case " ++ unkw tokenName ++ "():"
+  , ind 3   "raise Exception('" ++ unkw tokenName ++ " not implemented')"
   ]
 
diff --git a/source/src/BNFC/Backend/Python/PyHelpers.hs b/source/src/BNFC/Backend/Python/PyHelpers.hs
index 300c1d9f..f68abe13 100644
--- a/source/src/BNFC/Backend/Python/PyHelpers.hs
+++ b/source/src/BNFC/Backend/Python/PyHelpers.hs
@@ -10,6 +10,12 @@ import Data.Char
 import BNFC.CF
 
 
+-- Indents by four spaces
+ind :: Int -> String -> String
+ind 0 s = s
+ind n s = ind (n-1) ("    " ++ s)
+
+
 addCommas :: [String] -> String
 addCommas ss = intercalate ", " ss
 
@@ -78,3 +84,53 @@ showEcss [] = ""
 showEcss (Left c:ecss) = show c ++ " " ++ (showEcss ecss)
 showEcss (Right strOp:ecss) = "\"" ++ strOp ++ "\" " ++ (showEcss ecss)
 
+
+-- | Adds an underscore if the string overlaps with a keyword.
+unkw :: String -> String
+unkw s = if s `elem` kwListWithSoftKeywords then s ++ "_" else s
+
+
+-- To add an extra underscore if something overlaps with a keyword.
+kwListWithSoftKeywords :: [String]
+kwListWithSoftKeywords =
+  [ "False"
+  , "None"
+  , "True"
+  , "and"
+  , "as"
+  , "assert"
+  , "async"
+  , "await"
+  , "break"
+  , "class"
+  , "continue"
+  , "def"
+  , "del"
+  , "elif"
+  , "else"
+  , "except"
+  , "finally"
+  , "for"
+  , "from"
+  , "global"
+  , "if"
+  , "import"
+  , "in"
+  , "is"
+  , "lambda"
+  , "nonlocal"
+  , "not"
+  , "or"
+  , "pass"
+  , "raise"
+  , "return"
+  , "try"
+  , "while"
+  , "with"
+  , "yield"
+  , "_"
+  , "case"
+  , "match"
+  , "type"
+  ]
+

From 6eafcd6bfa31ad64beab7999d8295e19274dd6b9 Mon Sep 17 00:00:00 2001
From: AiStudent <diskfragmenteraren@gmail.com>
Date: Mon, 6 Jan 2025 19:21:49 +0100
Subject: [PATCH 8/8] Retargeting to ANTLRv4

---
 docs/user_guide.rst                           |  54 ++-
 document/BNF_Converter_Python_Mode.html       |  76 ++--
 source/BNFC.cabal                             |  36 +-
 source/src/BNFC/Backend/Python.hs             |  81 +++--
 source/src/BNFC/Backend/Python/Antlr4Utils.hs |  46 +++
 .../BNFC/Backend/Python/CFtoAntlr4Lexer.hs    | 187 ++++++++++
 .../BNFC/Backend/Python/CFtoAntlr4Parser.hs   | 342 ++++++++++++++++++
 source/src/BNFC/Backend/Python/CFtoPyAbs.hs   | 295 +--------------
 .../Backend/Python/CFtoPyPrettyPrinter.hs     |  82 +++--
 source/src/BNFC/Backend/Python/PyHelpers.hs   |  23 +-
 testing/src/ParameterizedTests.hs             |   2 +-
 11 files changed, 810 insertions(+), 414 deletions(-)
 create mode 100644 source/src/BNFC/Backend/Python/Antlr4Utils.hs
 create mode 100644 source/src/BNFC/Backend/Python/CFtoAntlr4Lexer.hs
 create mode 100644 source/src/BNFC/Backend/Python/CFtoAntlr4Parser.hs

diff --git a/docs/user_guide.rst b/docs/user_guide.rst
index c4e1e287..d68784c9 100644
--- a/docs/user_guide.rst
+++ b/docs/user_guide.rst
@@ -289,13 +289,13 @@ Python Backend
 ===============
 
 The BNF Converter's Python Backend generates a Python frontend, that uses 
-Lark, to parse input into an AST (abstract syntax tree).
+Antlr4, to parse input into an AST (abstract syntax tree).
 
-Lark and Python 3.10 or higher is needed.
+The python package Antlr4, the jar for Antlr4 and Python 3.10 or higher is needed.
 
 Example usage: ::
 
-    bnfc --python Calc.cf
+    bnfc --python -m Calc.cf
 
 
 .. list-table:: The result is a set of files:
@@ -304,19 +304,29 @@ Example usage: ::
 
    * - Filename
      - Description
+   * - bnfcPyGenCalc/CalcLexer.g4
+     - Provides the grammar for the lexer.
+   * - bnfcPyGenCalc/CalcParser.g4
+     - Provides the grammar for the parser.
    * - bnfcPyGenCalc/Absyn.py
      - Provides the classes for the abstract syntax.
-   * - bnfcPyGenCalc/ParserDefs.py
-     - Provides Lark with the information needed to build the lexer and parser.
    * - bnfcPyGenCalc/PrettyPrinter.py
      - Provides printing for both the AST and the linearized tree.
    * - genTest.py
      - A ready test-file, that uses the generated frontend to convert input into an AST.
    * - skele.py
      - Provides skeleton code to deconstruct an AST, using structural pattern matching.
+   * - Makefile
+     - The makefile, which uses an Antlr jar file to produce the lexer and parser for Python.
 
-Optionally one may with ``-m``` also create a makefile that contains the target
-"distclean" to remove the generated files.
+Make sure the jar for Antlr is accessible from the generated makefile and
+run the makefile. For example, on linux, one can export the following
+variable from ``.profile``:
+
+``export ANTLR="$HOME/Downloads/antlr/antlr-4.13.2-complete.jar"``
+
+Subsequently run ``make``. The generated lexer and parser is placed inside the
+folder used above.
 
 Testing the frontend
 ....................
@@ -337,16 +347,15 @@ and it's possible to just use an argument::
 Caveats
 .......
 
-Several entrypoints:
-  The testfile genTest.py only uses the first entrypoint used by default. To
-  use all entrypoints, set the start parameter to "start_". If the 
-  entrypoints cause reduce/reduce conflicts, a lark GrammarError will be
-  produced.
+Maximum elements for hand-made lists:
+  If one defines custom rules for lists, such as::
+    
+    (:) [C] ::= 'a' C 'b' [C] 'c'
 
-Results from the parameterized tests:
-  While the Python backend generates working frontends for the example
-  grammars, five "failures" and six "errors" among the regression
-  tests are reported.
+  the Python backend can not simplify the rule for an iterative approach
+  for the parser, meaning at most 1000 elements can be parsed - or a maximum
+  recursion depth will be thrown. Using the terminal or separator pragmas
+  should work fine.
 
 Skeleton code for using lists as entrypoints:
   Matchers for using lists, such as [Exp], are not generated in the
@@ -355,10 +364,21 @@ Skeleton code for using lists as entrypoints:
   checking what type the elements have. Users are instead encouraged to use
   non-list entrypoints. 
 
-Using multiple separators
+Several entrypoints:
+  The testfile genTest.py only uses the first entrypoint used by default.
+
+Using multiple separators:
   Using multiple separators for the same category, such as below, generates
   Python functions with overlapping names, causing runtime errors.::
 
     separator Exp1 "," ;
     separator Exp1 ";" ;
 
+Results from the parameterized tests:
+  One error among the regression tests are reported: the Java BNFC example
+  grammar contains mutually left recursive rules.
+
+Escaped characters in haskell-hcr:
+  Attempting to parse ParCore.hcr from the BNFC example grammar 
+  haskell-hcr yield errors for escaped characters.
+
diff --git a/document/BNF_Converter_Python_Mode.html b/document/BNF_Converter_Python_Mode.html
index 4ffb46f6..ce939979 100644
--- a/document/BNF_Converter_Python_Mode.html
+++ b/document/BNF_Converter_Python_Mode.html
@@ -27,38 +27,42 @@ <h3>By Björn Werner</h3>
 <h3>2024</h3>
 <p>
   The BNF Converter's Python Backend generates a Python frontend, that uses 
-  Lark, to parse input into an AST (abstract syntax tree).
+  Antlr4, to parse input into an AST (abstract syntax tree).
 </p>
 <p>
   BNFC on Github:<br>
   <a href="https://github.com/BNFC/bnfc">https://github.com/BNFC/bnfc</a>
 </p>
 <p>
-  Lark github:<br>
-  <a href="https://github.com/lark-parser/lark">https://github.com/lark-parser/lark</a>
+  Antlr on Github:<br>
+  <a href="https://github.com/antlr/antlr4">https://github.com/antlr/antlr4</a>
 </p>
 <p>
-  Python 3.10 or higher is needed.
+  Requirements are: the jar file for ANTLRv4, the Python package antlr4, and
+  Python 3.10 or higher.
 </p>
 <h3>Usage</h3>
 <div style="margin-left: 40px; "><big><span style="font-family: monospace; ">
-    bnfc --python NAME.cf</span></big><br style="font-family: monospace; ">
+    bnfc --python -m NAME.cf</span></big><br style="font-family: monospace; ">
 </div>
 <p>
-The result is a set of files:
+There should now exist the following files:
 </p>
 <table style="padding: 1cm;">
   <tr>
     <th>Filename:</th><th>Description:</th>
   </tr>
   <tr>
-    <td>bnfcGenNAME/Absyn.py</td><td>Provides the classes for the abstract syntax.</td>
+    <td>bnfcPyGenNAME/NAMELexer.g4</td><td>Provides the grammar for the lexer.</td>
   </tr>
   <tr>
-    <td>bnfcGenNAME/ParserDefs.py</td><td>Provides Lark with the information needed to build the lexer and parser.</td>
+    <td>bnfcPyGenNAME/NAMEParser.g4</td><td>Provides the grammar for the parser.</td>
   </tr>
   <tr>
-    <td>bnfcGenNAME/PrettyPrinter.py</td><td>Provides printing for both the AST and the linearized tree.</td>
+    <td>bnfcPyGenNAME/Absyn.py</td><td>Provides the classes for the abstract syntax.</td>
+  </tr>
+  <tr>
+    <td>bnfcPyGenNAME/PrettyPrinter.py</td><td>Provides printing for both the AST and the linearized tree.</td>
   </tr>
   <tr>
     <td>genTest.py</td><td>A ready test-file, that uses the generated frontend to convert input into an AST.</td>
@@ -67,7 +71,21 @@ <h3>Usage</h3>
     <td>skele.py</td><td>Provides skeleton code to deconstruct an AST, using structural pattern matching.</td>
   </tr>
 </table>
-
+<p>
+Make sure the jar for Antlr is accessible from the generated makefile and run the makefile. The generated lexer and parser is placed inside the folder used above.
+</p>
+<p>
+  For example, on linux, export the following variable from .profile:
+</p>
+<div style="margin-left: 40px; "><big><span style="font-family: monospace; "> 
+export ANTLR="$HOME/Downloads/antlr/antlr-4.13.2-complete.jar"</span></big><br style="font-family: monospace; ">
+</div>
+<p>
+  After that it should be possible to run the makefile:
+</p>
+<div style="margin-left: 40px; "><big><span style="font-family: monospace; ">
+    make</span></big><br style="font-family: monospace; ">
+</div>
 <h3>Testing the frontend</h3>
 <p>
   The following example uses a frontend that is generated from a C-like grammar.
@@ -143,13 +161,27 @@ <h3>Using the skeleton file</h3>
 </p>
   
 <h3>Known issues</h3>
+<h4>
+  Maximum elements for hand-made list rules:
+</h4>
+<p>
+  If one defines custom rules for lists, such as:
+</p>
+<p style="font-family: monospace;"> 
+ (:) [C] ::= 'a' C 'b' [C] 'c'
+</p>
+<p>
+  the Python backend can not simplify the rule for an iterative approach
+  for the parser, meaning at most 1000 elements can be parsed - or a maximum
+  recursion depth will be thrown. Using the terminal or separator pragmas should work fine.
+</p>
 <h4>
   Skeleton code for using lists as entrypoints:
 </h4>
 <p>
   Matchers for using lists, such as [Exp], are not generated in the
   skeleton code as it may confuse users if the grammar uses several different 
-  list categories, as a user may then try to pattern match lists without 
+  list categories - as a user may then try to pattern match lists without 
   checking what type the elements have. Users are instead encouraged to use
   non-list entrypoints. 
 </p>
@@ -169,8 +201,12 @@ <h4>
   &nbsp;&nbsp;for exp in listexp_:<br>
   &nbsp;&nbsp;&nbsp;...
 </p>
+<h4>Several entrypoints:</h4>
+<p>
+  The testfile genTest.py only uses the first entrypoint by default.
+</p>
 <h4>
-  Using multiple separators
+  Using multiple separators:
 </h4>
 <p>
   Using multiple separators for the same category, such as below, generates
@@ -180,19 +216,15 @@ <h4>
   separator Exp1 "," ;<br>
   separator Exp1 ";" ;
 </p>
-<h4>Several entrypoints:</h4>
+<h4>
+Results from the parameterized tests:
+</h4>
 <p>
-  The testfile genTest.py only uses the first entrypoint used by default. To
-  use all entrypoints, set the start parameter to "start_". If the 
-  entrypoints cause reduce/reduce conflicts, a lark GrammarError will be
-  produced.
+  One error among the regression tests are reported: the Java BNFC example grammar contains mutually left recursive rules.
 </p>
 <h4>
-Results from the parameterized tests:
+  Example for grammar haskell-hcr:
 </h4>
 <p>
-  While the Python backend generates working frontends for the example
-  grammars, five "failures" and six "errors" among the regression
-  tests are reported.
+  Attempting to parse ParCore.hcr from the haskell-hcr example BNFC grammar yields an error for escaped characters.
 </p>
-
diff --git a/source/BNFC.cabal b/source/BNFC.cabal
index 9280a8f3..872f28fd 100644
--- a/source/BNFC.cabal
+++ b/source/BNFC.cabal
@@ -32,9 +32,8 @@ Description:
 
 -- Support range when build with cabal
 tested-with:
-  GHC == 9.10.1
-  GHC == 9.8.2
-  GHC == 9.6.5
+  GHC == 9.8.1
+  GHC == 9.6.3
   GHC == 9.4.8
   GHC == 9.2.8
   GHC == 9.0.2
@@ -44,6 +43,7 @@ tested-with:
   GHC == 8.4.4
   GHC == 8.2.2
   GHC == 8.0.2
+  GHC == 7.10.3
 
 extra-doc-files:
   README.md
@@ -81,9 +81,6 @@ executable bnfc
   other-modules:
     -- Generated by cabal
     Paths_BNFC
-  autogen-modules:
-    -- Generated by cabal
-    Paths_BNFC
   default-extensions:
     -- Keep in alphabetical order.
     LambdaCase
@@ -157,14 +154,6 @@ library
   --   BNFC.Lex
   --   -- Generated by happy
   --   BNFC.Par
-  -- 2023-11-03 We cannot add BNFC.{Lex,Par} as then the Lex.x and Par.y files
-  -- are not bundled by cabal dist.
-  -- Just make sure that there is no src/BNFC/{Lex,Par}.hs before running cabal sdist,
-  -- otherwise we will end up with both Lex.hs and Lex.x (resp. Par.{hs,y})
-  -- which will cause alex/happy to not be run, leading to build failures.
-  autogen-modules:
-    -- Generated by cabal
-    Paths_BNFC
   other-modules:
     -- Generated by cabal
     Paths_BNFC
@@ -266,6 +255,17 @@ library
     BNFC.Backend.Java.RegToAntlrLexer
     BNFC.Backend.Java.Utils
 
+    -- Python backend
+    BNFC.Backend.Python
+    BNFC.Backend.Python.CFtoPyAbs
+    BNFC.Backend.Python.CFtoPyPrettyPrinter
+    BNFC.Backend.Python.RegToFlex
+    BNFC.Backend.Python.PyHelpers
+    BNFC.Backend.Python.CFtoPySkele
+    BNFC.Backend.Python.CFtoAntlr4Lexer
+    BNFC.Backend.Python.CFtoAntlr4Parser
+    BNFC.Backend.Python.Antlr4Utils
+
     -- XML backend
     BNFC.Backend.XML
 
@@ -280,14 +280,6 @@ library
     BNFC.Backend.TreeSitter.CFtoTreeSitter
     BNFC.Backend.TreeSitter.RegToJSReg
 
-    -- Python backend
-    BNFC.Backend.Python
-    BNFC.Backend.Python.CFtoPyAbs
-    BNFC.Backend.Python.CFtoPyPrettyPrinter
-    BNFC.Backend.Python.RegToFlex
-    BNFC.Backend.Python.PyHelpers
-    BNFC.Backend.Python.CFtoPySkele
-    
 ----- Testing --------------------------------------------------------------
 
 test-suite unit-tests
diff --git a/source/src/BNFC/Backend/Python.hs b/source/src/BNFC/Backend/Python.hs
index e0dc012d..3e0240dd 100644
--- a/source/src/BNFC/Backend/Python.hs
+++ b/source/src/BNFC/Backend/Python.hs
@@ -1,11 +1,11 @@
-{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE OverloadedStrings #-}
-
 {-
     BNF Converter: Python main file
     Copyright (C) 2004  Author:  Bjorn Werner
 -}
 
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
+
 module BNFC.Backend.Python (makePython) where
 
 import Prelude hiding ((<>))
@@ -21,45 +21,54 @@ import BNFC.PrettyPrint
 import Data.Char  (toLower, isLetter)
 import qualified BNFC.Backend.Common.Makefile as Makefile
 
+import BNFC.Backend.Python.CFtoAntlr4Lexer (cf2AntlrLex)
+import BNFC.Backend.Python.CFtoAntlr4Parser (cf2AntlrParse)
+
 
 -- | Entrypoint for BNFC to use the Python backend.
 makePython :: SharedOptions -> CF -> MkFiles ()
 makePython opts cf = do
-    let pkgName =  "bnfcPyGen" ++ filter isLetter name
-    let (parsingDefs, abstractClasses) = cf2PyAbs pkgName cf
+    let pkgName =  "bnfcPyGen" ++ filteredName
+    let abstractClasses = cf2PyAbs cf
     let prettyPrinter = cf2PyPretty pkgName cf
     let skeletonCode = cf2PySkele pkgName cf
-    mkPyFile (pkgName ++ "/ParsingDefs.py") parsingDefs
     mkPyFile (pkgName ++ "/Absyn.py") abstractClasses
     mkPyFile (pkgName ++ "/PrettyPrinter.py") prettyPrinter
     mkPyFile "skele.py" skeletonCode
-    mkPyFile "genTest.py" (pyTest pkgName cf)
-    Makefile.mkMakefile (optMake opts) $ makefile pkgName (optMake opts)
+    mkPyFile "genTest.py" (pyTest pkgName filteredName cf)
+    Makefile.mkMakefile (optMake opts) $ makefile pkgName filteredName (optMake opts)
+
+    let (d, kwenv) = cf2AntlrLex filteredName cf
+    mkAntlrFile (pkgName ++ "/" ++ filteredName ++ "Lexer.g4") d
+    --cf2AntlrParse :: String -> String -> CF -> KeywordEnv -> String
+    let p = cf2AntlrParse filteredName (pkgName ++ ".Absyn") cf kwenv
+    mkAntlrFile (pkgName ++ "/" ++ filteredName ++ "Parser.g4") p
   where
     name :: String
     name = lang opts
+    filteredName = filter isLetter name
     mkPyFile x = mkfile x comment
-
+    mkAntlrFile x = mkfile x ("//" ++) -- "//" for comments
 
 -- | A makefile with distclean and clean specifically for the testsuite. No
 --   "all" is needed as bnfc has already generated the necessary Python files.
-makefile :: String -> Maybe String -> String -> Doc
-makefile pkgName optMakefileName basename = vcat
+makefile :: String -> String -> Maybe String -> String -> Doc
+makefile pkgName filteredName optMakefileName basename = vcat
   [
     Makefile.mkRule "all" []
-      [ "@echo \"Doing nothing: No compilation of the parser needed.\"" ]
+      [ "java -jar $(ANTLR) -Dlanguage=Python3 " ++ pkgName ++ "/" ++ filteredName ++ "Lexer.g4"
+      , "java -jar $(ANTLR) -Dlanguage=Python3 " ++ pkgName ++ "/" ++ filteredName ++ "Parser.g4" ]
   ,  Makefile.mkRule "clean" []
       [ "rm -f parser.out parsetab.py" ]
   , Makefile.mkRule "distclean" [ "vclean" ] []
   , Makefile.mkRule "vclean" []
       [ "rm -f " ++ unwords
         [ 
-          pkgName ++ "/ParsingDefs.py",
           pkgName ++ "/Absyn.py",
           pkgName ++ "/PrettyPrinter.py",
-          pkgName ++ "/ParsingDefs.py.bak",
           pkgName ++ "/Absyn.py.bak",
           pkgName ++ "/PrettyPrinter.py.bak",
+          pkgName ++ "/" ++ filteredName ++ "*",
           "skele.py",
           "genTest.py",
           "skele.py.bak",
@@ -86,11 +95,14 @@ comment x = "# " ++ x
 
 
 -- Produces the content for the testing file, genTest.py.
-pyTest :: String -> CF -> String
-pyTest pkgName cf = unlines 
+pyTest :: String -> String -> CF -> String
+pyTest pkgName filteredName cf = unlines 
   [ "import sys"
-  , "from " ++ pkgName ++ ".ParsingDefs import *"
   , "from " ++ pkgName ++ ".PrettyPrinter import printAST, lin, renderC"
+  , "from antlr4 import *"
+  , "from " ++ pkgName ++ "." ++ lexerName ++ " import " ++ lexerName
+  , "from " ++ pkgName ++ "." ++ parserName ++ " import " ++ parserName
+  , "from antlr4.error.ErrorListener import ErrorListener"
   , ""
   , "# Suggested input options:"
   , "# python3 genTest.py < sourcefile"
@@ -107,18 +119,28 @@ pyTest pkgName cf = unlines
   , "    for line in sys.stdin:"
   , "        inp += line"
   , ""
-  , "def onError(e):"
-  , "    print(e)"
-  , "    print('Parse failed')"
-  , "    quit(1)"
+  , "class ThrowingErrorListener(ErrorListener):"
+  , "    def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):"
+  , "        raise Exception(f'Syntax error at line {line}, column {column}: {msg}')"
   , ""
-  , "# Creates the Lark parser with the given grammar. By default to the first"
-  , "# entrypoint. Other entrypoints exist in ParsingDefs.py."
-  , "parser = Lark(grammar, start='" ++ defaultEntrypoint ++ "', parser='lalr', lexer='basic', transformer=TreeTransformer())"
+  , "try:"
+  , "    lexer = " ++ lexerName ++ "(InputStream(inp))"
+  , "    lexer.removeErrorListeners()"
+  , "    lexer.addErrorListener(ThrowingErrorListener())"
+  , ""
+  , "    stream = CommonTokenStream(lexer)"
+  , "    parser = " ++ parserName ++ "(stream)"
+  , "    parser.removeErrorListeners()"
+  , "    parser.addErrorListener(ThrowingErrorListener())"
+  , ""
+  , "    tree = parser.start_" ++ defaultEntrypoint ++ "()"
+  , "    ast = tree.result"
+  , "    error = False"
+  , "except Exception as e:"
+  , "    print(e)"
+  , "    error = True"
   , ""
-  , "# By default the first entrypoint is used. See ParsingDefs.py for alternatives."
-  , "ast = parser.parse(inp, on_error=onError)"
-  , "if ast:"
+  , "if not error and ast:"
   , "    print('Parse Successful!\\n')"
   , "    print('[Abstract Syntax]')"
   , "    print(printAST(ast))"
@@ -131,7 +153,8 @@ pyTest pkgName cf = unlines
   , "    quit(1)"
   ]
   where
-    defaultEntrypoint = map toLower 
-      ((translateToList . show . firstEntry) cf)
+    lexerName = filteredName ++ "Lexer"
+    parserName = filteredName ++ "Parser"
+    defaultEntrypoint = (translateToList . show . firstEntry) cf
 
 
diff --git a/source/src/BNFC/Backend/Python/Antlr4Utils.hs b/source/src/BNFC/Backend/Python/Antlr4Utils.hs
new file mode 100644
index 00000000..33fea7ef
--- /dev/null
+++ b/source/src/BNFC/Backend/Python/Antlr4Utils.hs
@@ -0,0 +1,46 @@
+{-
+  Description : Copied from the Java backend and modified for use with Python.
+  Modified by : Björn Werner 
+-}
+
+module BNFC.Backend.Python.Antlr4Utils (getRuleName, getLabelName, startSymbol,
+  comment) 
+  where
+
+import BNFC.CF
+import BNFC.Utils (mkName, NameStyle(..))
+import BNFC.Backend.Python.PyHelpers (pythonReserved)
+
+
+-- | Make an Antlr grammar file line comment
+comment :: String -> String
+comment = ("// " ++)
+
+
+-- Python keywords plus Antlr4 reserved keywords
+pythonAntlrReserved :: [String]
+pythonAntlrReserved = pythonReserved ++ 
+  [ "catch"
+  , "grammar"
+  , "throws"
+  ]
+
+
+-- | Appends an underscore if there is a clash with a Python or ANTLR keyword.
+--   E.g. "Grammar" clashes with ANTLR keyword "grammar" since
+--   we sometimes need the upper and sometimes the lower case version
+--   of "Grammar" in the generated parser.
+getRuleName :: String -> String
+getRuleName z
+  -- | firstLowerCase z `elem` ("grammar" : pythonReserved) = z ++ "_"
+  | z `elem` pythonAntlrReserved = z ++ "_"
+  | otherwise = z
+
+
+getLabelName :: Fun -> String
+getLabelName = mkName ["Rule"] CamelCase
+
+
+-- | Make a new entrypoint NT for an existing NT.
+startSymbol :: String -> String
+startSymbol = ("Start_" ++)
diff --git a/source/src/BNFC/Backend/Python/CFtoAntlr4Lexer.hs b/source/src/BNFC/Backend/Python/CFtoAntlr4Lexer.hs
new file mode 100644
index 00000000..e1d48098
--- /dev/null
+++ b/source/src/BNFC/Backend/Python/CFtoAntlr4Lexer.hs
@@ -0,0 +1,187 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+{-
+    BNF Converter: Python Antlr4 Lexer generator
+    Copyright (C) 2015  Author:  Gabriele Paganelli
+
+    Description   : This module generates the Antlr4 input file.
+                    Based on CFtoJLex15.hs
+
+    Author        : Gabriele Paganelli (gapag@distruzione.org)
+    Created       : 15 Oct, 2015
+    
+    Edited for Python by
+                  : Björn Werner
+    Modified      : 30 Dec, 2024
+
+-}
+
+module BNFC.Backend.Python.CFtoAntlr4Lexer ( cf2AntlrLex ) where
+
+import Prelude hiding ((<>))
+
+import Text.PrettyPrint
+import BNFC.CF
+import BNFC.Backend.Java.RegToAntlrLexer
+import BNFC.Backend.Common.NamedVariables
+import BNFC.Backend.Python.Antlr4Utils (getRuleName)
+
+
+-- | Creates a lexer grammar.
+-- Since antlr token identifiers must start with an uppercase symbol,
+-- I prepend "Surrogate_id_SYMB_" to the identifier.
+-- This introduces risks of clashes if somebody uses the same identifier for
+-- user defined tokens. This is not handled.
+-- returns the environment because the parser uses it.
+cf2AntlrLex :: String -> CF -> (Doc, KeywordEnv)
+cf2AntlrLex lang cf = (,env) $ vcat
+  [ prelude lang
+  , cMacros
+  -- unnamed symbols (those in quotes, not in token definitions)
+  , lexSymbols env
+  , restOfLexerGrammar cf
+  ]
+  where
+    env = zip (cfgSymbols cf ++ reservedWords cf) $ 
+      map (("Surrogate_id_SYMB_" ++) . show) [0 :: Int ..]
+
+
+-- | File prelude
+prelude :: String -> Doc
+prelude lang = vcat
+    [ "// Lexer definition for use with Antlr4"
+    , "lexer grammar" <+> text lang <> "Lexer;"
+    ]
+
+
+--For now all categories are included.
+--Optimally only the ones that are used should be generated.
+cMacros :: Doc
+cMacros = vcat
+    [ "// Predefined regular expressions in BNFC"
+    , frg "LETTER  : CAPITAL | SMALL"
+    , frg "CAPITAL : [A-Z\\u00C0-\\u00D6\\u00D8-\\u00DE]"
+    , frg "SMALL   : [a-z\\u00DF-\\u00F6\\u00F8-\\u00FF]"
+    , frg "DIGIT   : [0-9]"
+    ]
+  where frg a = "fragment" <+> a <+> ";"
+
+
+escapeChars :: String -> String
+escapeChars = concatMap escapeCharInSingleQuotes
+
+
+-- |
+-- >>> lexSymbols [("foo","bar")]
+-- bar : 'foo' ;
+-- >>> lexSymbols [("\\","bar")]
+-- bar : '\\' ;
+-- >>> lexSymbols [("/","bar")]
+-- bar : '/' ;
+-- >>> lexSymbols [("~","bar")]
+-- bar : '~' ;
+lexSymbols :: KeywordEnv -> Doc
+lexSymbols ss = vcat $  map transSym ss
+  where
+    transSym (s,r) = text r <>  " : '" <> text (escapeChars s) <> "' ;"
+
+
+-- | Writes rules for user defined tokens, and, if used, the predefined 
+-- BNFC tokens.
+restOfLexerGrammar :: CF -> Doc
+restOfLexerGrammar cf = vcat
+  [ lexComments (comments cf)
+  , ""
+  , userDefTokens
+  , ifString strdec
+  , ifChar chardec
+  , ifC catDouble 
+    [ "// Double predefined token type"
+    , "DOUBLE : DIGIT+ '.' DIGIT+ ('e' '-'? DIGIT+)?;"
+    ]
+  , ifC catInteger 
+    [ "//Integer predefined token type"
+    , "INTEGER : DIGIT+;"
+    ]
+  , ifC catIdent 
+    [ "// Identifier token type"
+    , "fragment"
+    , "IDENTIFIER_FIRST : LETTER | '_';"
+    , "IDENT : IDENTIFIER_FIRST (IDENTIFIER_FIRST | DIGIT)*;"
+    ]
+  , "// Whitespace"
+  , "WS : (' ' | '\\r' | '\\t' | '\\n' | '\\f')+ ->  skip;"
+  , "// Escapable sequences"
+  , "fragment"
+  , "Escapable : ('\"' | '\\\\' | 'n' | 't' | 'r' | 'f');"
+  , "ErrorToken : . ;"
+  , ifString stringmodes
+  , ifChar charmodes
+  ]
+  where
+    ifC cat s     = if isUsedCat cf (TokenCat cat) then vcat s else ""
+    ifString      = ifC catString
+    ifChar        = ifC catChar
+    strdec        = [ "// String token type"
+                    , "STRING : '\"' -> more, mode(STRINGMODE);"
+                    ]
+    chardec       = ["CHAR : '\\''   -> more, mode(CHARMODE);"]
+    userDefTokens = vcat
+        [ text (getRuleName name) <> " : " <> text (printRegJLex exp) <> ";"
+        | (name, exp) <- tokenPragmas cf ]
+    stringmodes   = [ "mode STRESCAPE;"
+        , "STRESCAPED : Escapable  -> more, popMode ;"
+        , "mode STRINGMODE;"
+        , "STRINGESC : '\\\\' -> more , pushMode(STRESCAPE);"
+        , "STRINGEND : '\"' ->  type(STRING), mode(DEFAULT_MODE);"
+        , "STRINGTEXT : ~[\"\\\\] -> more;"
+        ]
+    charmodes     = [ "mode CHARMODE;"
+        , "CHARANY     :  ~['\\\\] -> more, mode(CHAREND);"
+        , "CHARESC     :  '\\\\'  -> more, pushMode(CHAREND),pushMode(ESCAPE);"
+        , "mode ESCAPE;"
+        , "ESCAPED : (Escapable | '\\'')  -> more, popMode ;"
+        , "mode CHAREND;"
+        , "CHARENDC     :  '\\''  -> type(CHAR), mode(DEFAULT_MODE);"
+        ]
+
+
+-- | Stores multi and single line comment rules. 
+lexComments :: ([(String, String)], [String]) -> Doc
+lexComments ([],[]) = ""
+lexComments (m,s) = vcat
+  (prod "COMMENT_antlr_builtin" lexSingleComment s ++
+    prod "MULTICOMMENT_antlr_builtin" lexMultiComment m )
+  where
+    prod bg lc ty = [bg, ": ("] ++ punctuate "|" (map lc ty) ++ skiplex
+    skiplex       = [") -> skip;"]
+
+
+-- | Create lexer rule for single-line comments.
+--
+-- >>> lexSingleComment "--"
+-- '--' ~[\r\n]* (('\r'? '\n')|EOF)
+--
+-- >>> lexSingleComment "\""
+-- '"' ~[\r\n]* (('\r'? '\n')|EOF)
+lexSingleComment :: String -> Doc
+lexSingleComment c =
+    "'" <>text (escapeChars c) <>  "' ~[\\r\\n]* (('\\r'? '\\n')|EOF)"
+
+
+-- | Create lexer rule for multi-lines comments.
+--
+-- There might be a possible bug here if a language includes 2 multi-line
+-- comments. They could possibly start a comment with one character and end it
+-- with another. However this seems rare.
+--
+-- >>> lexMultiComment ("{-", "-}")
+-- '{-' (.)*? '-}'
+--
+-- >>> lexMultiComment ("\"'", "'\"")
+-- '"\'' (.)*? '\'"'
+lexMultiComment :: (String, String) -> Doc
+lexMultiComment (b,e) = "'" <> text (escapeChars b)
+  <> "' (.)*? '"<> text (escapeChars e)
+  <> "'"
diff --git a/source/src/BNFC/Backend/Python/CFtoAntlr4Parser.hs b/source/src/BNFC/Backend/Python/CFtoAntlr4Parser.hs
new file mode 100644
index 00000000..14357e1c
--- /dev/null
+++ b/source/src/BNFC/Backend/Python/CFtoAntlr4Parser.hs
@@ -0,0 +1,342 @@
+{-
+    BNF Converter: Antlr4 Python Generator
+    Copyright (C) 2004  Author:  Markus Forsberg, Michael Pellauer,
+                                 Bjorn Bringert
+
+    Description   : This module generates the ANTLR .g4 input file for the
+                    Python backend. It follows the same basic structure 
+                    of CFtoHappy.
+
+    Author        : Gabriele Paganelli (gapag@distruzione.org)
+    Created       : 15 Oct, 2015
+
+    Edited for Python 2024 by
+                  : Björn Werner
+-}
+
+{-# LANGUAGE LambdaCase #-}
+
+module BNFC.Backend.Python.CFtoAntlr4Parser ( cf2AntlrParse ) where
+
+import Data.Foldable ( toList )
+import Data.List     ( intercalate )
+import Data.Maybe
+import BNFC.CF
+import BNFC.Utils   ( (+++), (+.+), applyWhen )
+import BNFC.Backend.Python.Antlr4Utils
+import BNFC.Backend.Common.NamedVariables
+import BNFC.Backend.Python.PyHelpers
+import Data.Either   (lefts, rights, isLeft)
+
+
+-- | A definition of a non-terminal by all its rhss,
+--   together with parse actions.
+data PDef = PDef
+  { _pdNT   :: Maybe String
+      -- ^ If given, the name of the lhss.  Usually computed from 'pdCat'.
+  , _pdCat  :: Cat
+      -- ^ The category to parse.
+  , _pdAlts :: [(Pattern, Action, Maybe Fun)]
+      -- ^ The possible rhss with actions.  If 'null', skip this 'PDef'.
+      --   Where 'Nothing', skip ANTLR rule label.
+  }
+type Rules       = [PDef]
+type Pattern     = String
+type Action      = String
+type MetaVar     = (String, Cat)
+
+
+-- | Creates the ANTLR parser grammar for this CF.
+--The environment comes from CFtoAntlr4Lexer
+cf2AntlrParse :: String -> String -> CF -> KeywordEnv -> String
+cf2AntlrParse lang packageAbsyn cf env = unlines $ concat
+  [ [ header
+    , tokens
+    , importAbs
+    , ""
+    -- Generate start rules [#272]
+    -- _X returns [ dX result ] : x=X EOF { $result = $x.result; }
+    , prRules packageAbsyn $ map entrypoint $ toList $ allEntryPoints cf
+    -- Generate regular rules
+    , prRules packageAbsyn $ rulesForAntlr4 packageAbsyn cf env
+    ]
+  ]
+  where
+    header :: String
+    header = unlines
+        [ "// Parser definition for use with ANTLRv4"
+        , "parser grammar" +++ lang ++ "Parser;"
+        ]
+    tokens :: String
+    tokens = unlines
+        [ "options {"
+        , "  tokenVocab = " ++ lang ++ "Lexer;"
+        , "}"
+        ]
+    importAbs :: String
+    importAbs = unlines
+        [ "@parser::header {import " ++ packageAbsyn
+        , "}"
+        ]
+
+
+-- | Generate start rule to help ANTLR.
+--
+--   @start_X returns [ X result ] : x=X EOF { $result = $x.result; } # Start_X@
+--
+entrypoint :: Cat -> PDef
+entrypoint cat =
+  PDef (Just nt) cat [(pat, act, fun)]
+  where
+    nt  = firstLowerCase $ startSymbol $ identCat cat
+    pat = "x=" ++ catToNT cat +++ "EOF"
+    act = "$result = $x.result;"
+    -- No ANTLR Rule label, ("Start_" ++ identCat cat) conflicts with lhs.
+    fun = Nothing 
+
+
+-- | The following functions are a (relatively) straightforward translation
+-- of the ones in CFtoHappy.hs
+rulesForAntlr4 :: String -> CF -> KeywordEnv -> Rules
+rulesForAntlr4 packageAbsyn cf env = map mkOne getrules
+  where
+    getrules          = ruleGroups cf
+    mkOne (cat,rules) = constructRule packageAbsyn cf env rules cat
+
+
+-- | Aids the pattern constructor for lists
+data ListType = Term | Sep | None
+  deriving Eq
+
+
+-- | For every non-terminal, we construct a set of rules. A rule is a 
+--   sequence of terminals and non-terminals, and an action to be performed.
+--   Complete sets of separator or terminator rules are treated separately, as
+--   the default recursive parsing may reach the maximum recursion depth 
+--   in Python. Cases of multiple sets of rules are not considered.
+constructRule :: String -> CF -> KeywordEnv -> [Rule] -> NonTerminal -> PDef
+constructRule packageAbsyn cf env rules nt
+  | termOrSep /= None = PDef Nothing nt $ 
+    (if oneNilFun then
+        [ (" /* empty */ "
+          , "$result=[]"
+          , Nothing
+          )
+        ]
+      else
+        []
+    ) ++
+    [ ( generateListPatterns packageAbsyn env 
+        (rhsRule (head consFuns)) termOrSep oneNilFun
+      , "# actions embedded in pattern"
+      , Nothing
+      )
+    ]
+  | otherwise = PDef Nothing nt $
+    [ ( p
+      , generateAction packageAbsyn nt (funRule r) m b
+      , Nothing  -- labels not needed for BNFC-generated AST parser
+      )
+    | (index, r0) <- zip [1..] rules
+    , let b = isConsFun (funRule r0) && elem (valCat r0) (cfgReversibleCats cf)
+    , let r      = applyWhen b revSepListRule r0
+    , let (p,m0) = generatePatterns index env r 
+    , let m      = applyWhen b reverse m0
+    ]
+  where
+    -- Figures out if the rules are well formed list rules (using the
+    -- separator or terminator pragmas).
+    nilFuns = filter isNilFun rules
+    oneFuns = filter isOneFun rules
+    consFuns = filter isConsFun rules
+    
+    noNilFuns = length nilFuns == 0
+    noOneFuns = length oneFuns == 0
+
+    oneNilFun = length nilFuns == 1
+    oneOneFun = length oneFuns == 1
+    oneConsFun = length consFuns == 1
+
+    onlyMiddle :: [Either Cat String] -> Bool
+    onlyMiddle ecs = all isLeft [head ecs, last ecs]
+    
+    noStrings :: [Either Cat String] -> Bool
+    noStrings ecs = length (rights ecs) == 0
+  
+    -- Terminator:
+    -- []
+    -- (:) C ... [C]
+    isTerminator = oneNilFun && noOneFuns && oneConsFun && 
+      (noStrings . rhsRule . head) nilFuns && 
+      (onlyMiddle . rhsRule . head) consFuns
+
+    -- Terminator nonempty:
+    -- (:[]) C ...
+    -- (:)   C ... [C]
+    isTerminatorNonempty = noNilFuns && oneOneFun && oneConsFun && 
+      (isLeft . head . rhsRule . head) oneFuns && 
+      (onlyMiddle . rhsRule . head) consFuns && 
+      (rights . rhsRule . head) oneFuns == ((rights . rhsRule . head) consFuns)
+     
+    -- Separator:
+    -- []
+    -- (:[]) C
+    -- (:)   C ... [C]
+    isSeparator = oneNilFun && oneOneFun && oneConsFun && 
+      (noStrings . rhsRule . head) nilFuns && 
+      (noStrings . rhsRule . head) oneFuns && 
+      (onlyMiddle . rhsRule . head) consFuns
+
+    -- Separator nonempty:
+    -- (:[]) C
+    -- (:)   C ... [C]
+    isSeparatorNonempty = noNilFuns && oneOneFun && oneConsFun && 
+      (noStrings . rhsRule . head) oneFuns && 
+      (onlyMiddle . rhsRule . head) consFuns
+
+    termOrSep
+      | isTerminator || isTerminatorNonempty = Term
+      | isSeparator || isSeparatorNonempty = Sep
+      | otherwise = None
+    
+
+-- | Generates a string containing the semantic action.
+generateAction :: IsFun f => String -> NonTerminal -> f -> [MetaVar]
+               -> Bool   -- ^ Whether the list should be reversed or not.
+                         --   Only used if this is a list rule.
+               -> Action
+generateAction packageAbsyn _ f ms rev 
+  | isNilFun f = "$result =" ++ c ++ ";"
+  | isOneFun f = "$result =" ++ c ++ ";; $result.append(" ++ p_1 ++ ")"
+  | isConsFun f = "$result =" ++ p_2 ++ ";; $result." ++ add p_1
+  | isCoercion f = "$result = " ++  p_1 ++ ";"
+  | isDefinedRule f = 
+    "$result = " ++ packageAbsyn ++ "." ++ sanitize (funName f)
+    ++ "(" ++ intercalate "," (map (resultvalue packageAbsyn) ms) ++ ");"
+  | otherwise = "$result = " ++ c
+    ++ "(" ++ intercalate "," (map (resultvalue packageAbsyn) ms) ++ ");"
+  where
+    sanitize          = getRuleName
+    c                 = if isNilFun f || isOneFun f || isConsFun f
+                         then "[]" 
+                         else packageAbsyn ++ "." ++ sanitize (funName f)
+    p_1               = resultvalue packageAbsyn $ ms!!0
+    p_2               = resultvalue packageAbsyn $ ms!!1
+    add p             = (if rev then "append(" else "insert(0, ") ++ p ++ ")"
+
+
+-- | Gives the abstract syntax constructor for a cat.
+resultvalue :: String -> MetaVar -> String
+resultvalue packageAbsyn (n,c) = case c of
+      TokenCat "Double"  -> concat [ packageAbsyn ++ ".Double(", txt, ")" ]
+      TokenCat "Integer" -> concat [ packageAbsyn ++ ".Integer("  , txt, ")" ]
+      TokenCat "Char"    -> packageAbsyn ++ ".Char(" ++ txt ++ ")"
+      TokenCat "String"  -> packageAbsyn ++ ".String(" ++ txt ++ ")"
+      TokenCat "Ident"   -> concat [ packageAbsyn, ".Ident(", txt, ")" ]
+      TokenCat s         -> packageAbsyn ++ "." ++ unkw s ++ "(" ++ txt ++ ")"
+      _                  -> concat [ "$", n, ".result" ]
+      where 
+        txt = '$':n +.+ "text"
+
+
+ -- | Generate patterns and a set of metavariables indicating
+-- where in the pattern the non-terminal
+-- >>> generatePatterns 2 [] $ npRule "myfun" (Cat "A") [] Parsable
+-- (" /* empty */ ",[])
+-- >>> generatePatterns 3 [("def", "_SYMB_1")] $ npRule "myfun" (Cat "A") [Right "def", Left (Cat "B")] Parsable
+-- ("_SYMB_1 p_3_2=b",[("p_3_2",B)])
+generatePatterns :: Int -> KeywordEnv -> Rule -> (Pattern,[MetaVar])
+generatePatterns ind env r =
+  case rhsRule r of
+    []  -> (" /* empty */ ", [])
+    its -> ( unwords $ mapMaybe (uncurry mkIt) nits
+           , [ (var i, cat) | (i, Left cat) <- nits ]
+           )
+      where
+      nits   = zip [1 :: Int ..] its
+      var i  = "p_" ++ show ind ++"_"++ show i
+      mkIt i = \case
+        Left  c -> Just $ var i ++ "=" ++ catToNT c
+        Right s -> lookup s env
+
+
+-- | Nonempty patterns with embedded semantic actions, to match:
+-- Separator: 
+--  C (... C)*
+-- Terminator:
+--  (C ...)+
+-- A terminators for example is turned into the pattern:
+--  {init list action} ( p_1_2=C {append action} )+
+-- Not that for separators with empty, consFun empty is a possible derivation, 
+-- meaning a separator can end with delims:
+-- C (... C)* (...)?
+generateListPatterns :: String -> KeywordEnv -> [Either Cat String] -> 
+  ListType -> Bool -> Pattern
+generateListPatterns packageAbsyn env ecss termOrSep oneNilFun =
+  case termOrSep of
+    Sep   -> p1 ++ " {" ++ a1 ++ "} (" ++ delims ++ " " ++ p2 ++ 
+      " {" ++ a2 ++ "})*" ++ if oneNilFun then "(" ++ delims ++ ")?" else ""
+    Term  -> "{$result=[];} (" ++ p2 ++ " " ++ delims ++ " {" ++  a2 ++ "})+"
+    None -> error "Neither Term or Sep"
+  where
+    c = head (lefts ecss)
+    p1 = "p_1_1=" ++ catToNT c
+    p2 = "p_1_2=" ++ catToNT c
+
+    a1 = "$result = [" ++ resultvalue packageAbsyn ("p_1_1", c) ++ "]"
+    a2 = "$result.append(" ++ resultvalue packageAbsyn ("p_1_2", c) ++ ")"
+
+    delims = unwords (mapMaybe (\s -> lookup s env) (rights ecss)) 
+
+
+-- | Converts a cat to string, an underscore is appended for keywords words.
+catToNT :: Cat -> String
+catToNT = \case
+  TokenCat "Ident"   -> "IDENT"
+  TokenCat "Integer" -> "INTEGER"
+  TokenCat "Char"    -> "CHAR"
+  TokenCat "Double"  -> "DOUBLE"
+  TokenCat "String"  -> "STRING"
+  c | isTokenCat c   -> getRuleName $ identCat c
+    | otherwise      -> getRuleName $ firstLowerCase $ identCat c
+
+
+-- | Puts together the pattern and actions and returns a string containing all
+-- the rules.
+prRules :: String -> Rules -> String
+prRules packabs = concatMap $ \case
+
+  -- No rules: skip.
+  PDef _mlhs _nt []         -> ""
+
+  -- At least one rule: print!
+  PDef mlhs nt (rhs : rhss) -> unlines $ concat
+
+    -- The definition header: lhs and type.
+    [ [ unwords [ fromMaybe nt' mlhs
+                , "returns" , "[" , packabs+.+normcat , "result" , "]"
+                ]
+      ]
+    -- The first rhs.
+    , alternative "  :" rhs
+    -- The other rhss.
+    , concatMap (alternative "  |") rhss
+    -- The definition footer.
+    , [ "  ;" ]
+    ]
+    where
+      alternative sep (p, a, label) = concat
+        [ [ concat [ sep , p ] ]
+        , [ concat [ "    {" , a , "}" ] ]
+        , [ concat [ "    #" , antlrRuleLabel l ] | Just l <- [label] ]
+        ]
+      catid              = identCat nt
+      normcat            = identCat (normCat nt)
+      nt'                = getRuleName $ firstLowerCase catid
+      antlrRuleLabel :: Fun -> String
+      antlrRuleLabel fnc
+        | isNilFun fnc   = catid ++ "_Empty"
+        | isOneFun fnc   = catid ++ "_AppendLast"
+        | isConsFun fnc  = catid ++ "_PrependFirst"
+        | isCoercion fnc = "Coercion_" ++ catid
+        | otherwise      = getLabelName fnc
diff --git a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
index dd346984..6840453a 100644
--- a/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
+++ b/source/src/BNFC/Backend/Python/CFtoPyAbs.hs
@@ -6,33 +6,18 @@
 -}
 
 module BNFC.Backend.Python.CFtoPyAbs (cf2PyAbs) where
-import Data.List     ( nub, intercalate )
+import Data.List     (nub)
+import Data.Char     (isLower)
+import Data.Either   (lefts)
 import BNFC.CF
 import BNFC.Backend.Python.PyHelpers
-import BNFC.Backend.Python.RegToFlex (printRegFlex, escapeChar)
 import BNFC.Backend.Common.NamedVariables
 import Text.PrettyPrint (Doc, render)
-import Data.Either   (lefts)
-import Data.Char                    (toLower, toUpper, isLower)
-import qualified Data.List.NonEmpty as List1
 
 
--- | The result is ParsingDefs.py & Absyn.py
-cf2PyAbs
-  :: String
-  -> CF     -- ^ Grammar.
-  -> (String, String) -- ParsingDefs.py, Absyn.py.
-cf2PyAbs pkgName cf = ( unlines 
-  [ "from lark import Lark, Transformer, v_args"
-  , "from dataclasses import dataclass"
-  , "from " ++ pkgName ++ ".Absyn import *"
-  , ""
-  , createGrammar cf
-  , createTransformer cf
-  , createDefineFunctions cf
-  , ""
-  ]
-  , unlines 
+-- | Produces the content for Absyn.py
+cf2PyAbs :: CF -> String
+cf2PyAbs cf = unlines 
   ["from typing import List as _List"
   ,"# Value categories (no coercsions):"
   , unlines valueCatsClasses 
@@ -42,8 +27,9 @@ cf2PyAbs pkgName cf = ( unlines
   ,"# Rules:"
   ,"from dataclasses import dataclass, field"
   ,"\n" ++ (unlines dataClasses)
+  , ""
+  , createDefineFunctions cf
   ]
-  )
   where
     rules = cfgRules cf
     
@@ -71,267 +57,6 @@ cf2PyAbs pkgName cf = ( unlines
     valueCatsClasses = map createValueCatClass valueCatNames
 
 
--- Creates a grammar for Lark. Not that it is a real string (r"...").
-createGrammar :: CF -> String
-createGrammar cf = unlines 
-  [ "grammar = r\"\"\""
-  , "  ?start_: " ++ entryOrClause
-  , ""
-  , unlines orClauses
-  , larkLiterals cf
-  , unlines singleComments
-  , unlines multiComments
-  , "  %import common.WS"
-  , "  %ignore WS"
-  , "\"\"\""
-  ]
-  where
-    aCats = reallyAllCats cf
-    rs = cfgRules cf
-
-    enumeratedRules :: [(Int, Rul RFun)]
-    enumeratedRules = enumerateAllDefinedRules rs 1 []
-    orClauses = map (createOrClause enumeratedRules) aCats
-
-    (multiMatchers, singleMatchers) = comments cf
-    singleComments = map createLineCommentMatcher singleMatchers
-    multiComments = map createMultiLineCommentMatcher multiMatchers
-
-    strListEntryPoints = map ((map toLower) .  translateToList . show) 
-      ((List1.toList . allEntryPoints) cf)
-    entryOrClause = intercalate "\n  | " strListEntryPoints
-
-
--- Enumerates all (only defined relevant) rules to prevent naming overlap.
-enumerateAllDefinedRules :: [Rul RFun] -> Int -> [(Int, Rul RFun)]
-  -> [(Int, Rul RFun)]
-enumerateAllDefinedRules [] _ irs = irs
-enumerateAllDefinedRules (r:rs) n irs
-  | isDefinedRule r = enumerateAllDefinedRules rs (n+1) (irs ++ [(n, r)])
-  | otherwise = enumerateAllDefinedRules rs n (irs ++ [(0, r)]) 
-
-
--- Creates an or clause with all rules for a given category.
-createOrClause :: [(Int, Rul RFun)] -> Cat -> String
-createOrClause irs c = unlines
-  [ "  ?" ++ map toLower (translateToList (show c)) ++ ": " ++ 
-    intercalate "\n  | " 
-      (map createProdAndNameForRule catsIrs)
-  ]
-  where
-    catsIrs = [(n, removeWhiteSpaceSeparators r) | (n, r) <- irs, 
-      valCat r == c, isParsable r]
-
-
--- Creates an entry for an or clause.
-createProdAndNameForRule :: (Int, Rul RFun) -> String
-createProdAndNameForRule (n, r) =  prodToDocStr (rhsRule r) ++ 
-  if (not (isCoercion r)) then " -> " ++ map toLower name else ""
-  where
-    name
-      | isNilFun r = "nil" ++ (identCat . valCat) r
-      | isOneFun r = "one" ++ (identCat . valCat) r
-      | isConsFun r = "cons" ++ (identCat . valCat) r
-      | isDefinedRule r = "d" ++ show n ++ "_r_" ++ funName r
-      | otherwise = "r_" ++ map toLower (funName r) ++ toOrd (funName r) 
-
-
--- Creates the literals for a grammar for Lark.Priority is set after the 
--- dot, such as "Name.PRIO". For literals with the same priority, it appears
--- that Lark (with basic mode) prioritizes the longest regular 
--- expression, not the longest matched literal.
-larkLiterals :: CF -> String
-larkLiterals cf = unlines $ concat
-  [ 
-  ifC catString  [createLiteral "String.1" "\"(\\\\.|[^\"])*\""]
-  , ifC catChar  [createLiteral "Char.1" "\\'(\\\\x[0-9a-f][0-9a-f]|\\\\?[\\S\\s])\\'"]
-  , ifC catDouble  [createLiteral "Double.1" "\\d+\\.\\d+(e-?\\d+)?"]
-  , ifC catInteger [createLiteral "Integer.1" "\\d+"]
-  -- Prolog requires user defined tokens to have priority over Ident; C 
-  -- requires Double to have priority over user defined tokens, as C has
-  -- "CDouble" matching "3." in 3.14.
-  , userDefTokens 
-  , ifC catIdent   [createLiteral "Ident" "[A-Za-z_]\\w*"]
-  ]
-  where
-    ifC :: TokenCat -> [String] -> [String]
-    ifC cat s = if isUsedCat cf (TokenCat cat) then s else []
-
-    userDefTokens :: [String] 
-    userDefTokens = [
-      createLiteral name (printRegFlex exp) | (name, exp) <- tokenPragmas cf
-      ]
-
-    createLiteral :: String -> String -> String
-    createLiteral name regex = 
-      "  " ++ map toUpper name ++ ": /" ++ regex ++ "/"
-
-
--- Creates the class transformer, where each member method tells Lark how
--- to transform some parsed node in the tree. 
-createTransformer :: CF -> String
-createTransformer cf = unlines 
-  [ "#transformer"
-  , "class TreeTransformer(Transformer):"
-  , unlines (map createRuleTransform rs)
-  , unlines (map makeDefineTransform enumeratedRDs)
-  , unlines (map createListTransform listRules)
-  , createTokenTransformers cf
-  ]
-  where
-    enumeratedRules :: [(Int, Rul RFun)]
-    enumeratedRules = enumerateAllDefinedRules (cfgRules cf) 1 []
-
-    rs = [r | r <- cfgRules cf
-      , not (isCoercion r)
-      , not (isNilCons r)
-      , not (isDefinedRule r)]
-    listRules = [r | r <- cfgRules cf, isNilCons r]
-
-    enumeratedRDs = [(n, r, d) | (n, r) <- enumeratedRules, d <- definitions cf
-      , not (isCoercion r)
-      , not (isNilCons r)
-      , isDefinedRule r
-      , nameCorresponds ((wpThing . defName) d) (funName r)]
-
-
--- Creates a transform for a rule
-createRuleTransform :: Rul RFun -> String
-createRuleTransform r = unlines
-  [ "  @v_args(inline=True)"
-  , "  def r_" ++ nameWithUnicode ++ "(self" ++ 
-    concat (map (", " ++) enumeratedVars) ++ "):"
-  , "    return " ++ className ++ "(" ++ intercalate ", " enumeratedVars ++ ")"
-  ] 
-  where
-    nameWithUnicode = map toLower (funName r) ++ toOrd (funName r)
-    className = unkw (funName r)
-    sentForm = rhsRule r
-    nvCats = numVars sentForm :: [Either (Cat, Doc) String]
-    enumeratedVars = [render d | (_, d) <- lefts nvCats]
-
-
--- Creates a transform for a list rule.
-createListTransform :: Rul RFun -> String
-createListTransform r = unlines
-  [ "  @v_args(inline=True)"
-  , "  def " ++ map toLower name ++ "(self" ++ 
-    concat (map (", " ++) enumeratedVars) ++ "):"
-  , "    return " ++ args
-  ] 
-  where
-    name
-      | isNilFun r = "nil" ++ (identCat . valCat) r
-      | isOneFun r = "one" ++ (identCat . valCat) r
-      | isConsFun r = "cons" ++ (identCat . valCat) r
-      | otherwise = funName r
-
-    sentForm = rhsRule r
-    nvCats = numVars sentForm :: [Either (Cat, Doc) String]
-    enumeratedVars = [render d | (_, d) <- lefts nvCats]
-
-    args :: String
-      | isNilFun r = "[]"
-      | isOneFun r = "[" ++ head enumeratedVars ++ "]"
-      | isConsFun r = "[" ++ head enumeratedVars ++ "] + " ++ 
-        last enumeratedVars
-      | otherwise = error "Should be a list function"
-
-
--- Creates the transformer functions for the tokens.
-createTokenTransformers :: CF -> String
-createTokenTransformers cf = unlines $ concat
-  [ 
-  ifC catString  [createTokenTransform "String"]
-  , ifC catChar  [createTokenTransform "Char"]
-  , ifC catDouble  [createTokenTransform "Double"]
-  , ifC catInteger [createTokenTransform "Integer"]
-  , userDefTokens 
-  , ifC catIdent   [createTokenTransform "Ident"]
-  ]
-  where
-  ifC :: TokenCat -> [String] -> [String]
-  ifC cat s = if isUsedCat cf (TokenCat cat) then s else []
-
-  userDefTokens :: [String] 
-  userDefTokens = [
-    createTokenTransform name | (name, _) <- tokenPragmas cf
-    ]
-
-
--- Creates a transform for a token.
-createTokenTransform :: String -> String
-createTokenTransform name = unlines
-  [ "  @v_args(inline=True)"
-  , "  def " ++ map toUpper name ++ "(self, token):"
-  , "    return " ++ unkw name ++ "(token.value)"
-  ] 
-
-
--- | Produces the production in the docstring for the parsing definitions.
-prodToDocStr ::[Either Cat String] -> String
-prodToDocStr [] = ""
-prodToDocStr (ec:[]) = ecsToDocStr ec
-prodToDocStr (ec:ecs) = 
-  ecsToDocStr ec ++ " " ++ prodToDocStr ecs
-
-
--- Converts a single element in the production.
-ecsToDocStr :: Either Cat String -> String
-ecsToDocStr (Left (TokenCat t))       = map toUpper t 
-ecsToDocStr (Left c)       = map toLower (translateToList (show c))
-ecsToDocStr (Right strOp)  = "\"" ++ concat (map escapeBackslash strOp) ++ "\""
-
-
--- | For single-line comments
-createLineCommentMatcher :: String -> String
-createLineCommentMatcher r = unlines
-  [ "  C" ++ toOrd r ++ ": /" ++ concat (map escapeChar r) ++ "[^\\n]*/"
-  , "  %ignore C" ++ toOrd r
-  ]
-
-
--- | For multi-line comments
-createMultiLineCommentMatcher :: (String, String) -> String
-createMultiLineCommentMatcher (s, e) = unlines
-  [ "  C" ++ toOrd (s ++ e) ++ ": /" ++ escaped s ++ "([\\s\\S]*?)" ++ 
-    escaped e ++ "/"
-  , "  %ignore C" ++ toOrd (s ++ e)
-  ]
-  where
-    escaped s = concat $ map escapeChar s
-
-
--- Since we're using a real string for the grammar,  r""" ... """ it seems 
--- we can't escape everything in strOp from regflex. Only backslashes.
-escapeBackslash :: Char -> String
-escapeBackslash '\\' = "\\\\"
-escapeBackslash c = [c]
-
-
--- | To compare names for defines. The first letter needs to be lowered, so
---   "while" == "While".
-nameCorresponds :: String -> String -> Bool
-nameCorresponds (x:xs) (y:ys) = (toLower x == toLower y) && (xs == ys)
-nameCorresponds _ _ = error "Names can't be empty"
-
-
--- Creates a transformer for a rule with its corresponding define.
-makeDefineTransform :: (Int, Rul RFun, Define) -> String
-makeDefineTransform (n, defRule, defi)  = unlines
-  [ "  @v_args(inline=True)"
-  , "  def d" ++ show n ++ "_r_" ++ map toLower name ++ "(self" ++ 
-    concat (map (", " ++) enumeratedVars) ++ "):"
-  , "    return d_" ++ name ++ "(" ++ intercalate ", " enumeratedVars ++ ")"
-  , ""
-  ]
-  where
-    name = (wpThing . defName) defi
-    sentForm = rhsRule defRule
-    nvCats = numVars sentForm :: [Either (Cat, Doc) String]
-    enumeratedVars = [render d | (_, d) <- lefts nvCats]
-
-
 -- | Converts the production of a define, called an expression, to a
 --   production for the parsing definition.
 expToDef :: CF -> Exp -> String
@@ -341,7 +66,7 @@ expToDef cf (App "(:)" _ (e:[recList])) = "[" ++ expToDef cf e ++ ", " ++
 expToDef _ (App "[]" _ _) = "[]"
 expToDef cf (App fName _ exps)
   | isLower (head fName) = 
-    "d_" ++ fName  ++ "(" ++ addCommas (map (expToDef cf) exps) ++ ")"
+    unkw fName  ++ "(" ++ addCommas (map (expToDef cf) exps) ++ ")"
   | otherwise = 
     unkw fName ++ "(" ++ addCommas (map (expToDef cf) exps) ++ ")"
 expToDef _ (Var s) = unkw s
@@ -418,7 +143,7 @@ createDefineFunctions cf = unlines
 
 createDefineFunction :: CF -> Define -> String
 createDefineFunction cf d = unlines
-  [ "def d_" ++ (wpThing . defName) d ++ "(" ++ addCommas args ++  "):"
+  [ "def " ++ (unkw . wpThing . defName) d ++ "(" ++ addCommas args ++  "):"
   , "  return " ++ expToDef cf (defBody d)
   ]
   where
diff --git a/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs b/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs
index 352f12c1..8464bff6 100644
--- a/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs
+++ b/source/src/BNFC/Backend/Python/CFtoPyPrettyPrinter.hs
@@ -6,20 +6,22 @@
 -}
 
 module BNFC.Backend.Python.CFtoPyPrettyPrinter ( cf2PyPretty ) where
-import Data.List     ( intercalate, nub )
+import Data.List     ( intercalate, nub, findIndices )
 import BNFC.CF 
 import BNFC.Backend.Python.PyHelpers
 import BNFC.Backend.Common.NamedVariables
 import Text.PrettyPrint (Doc, render)
-import Data.Either   (lefts)
+import Data.Either   (rights, lefts, isLeft)
 import BNFC.Backend.Common.StrUtils
 import qualified Data.List.NonEmpty as List1
 
+
 -- | Used to create PrettyPrinter.py, that contains the functionality
 --   to print the AST and the linearized tree.
 cf2PyPretty :: String -> CF -> String
 cf2PyPretty pkgName cf = unlines
   [ "from " ++ pkgName ++ ".Absyn import *"
+  , "import itertools"
   , ""
   , makePrintAST cf
   , ""
@@ -88,7 +90,7 @@ makePrintAST cf = concat
       | otherwise = ""
 
 
--- Creates deconstructors for all list categories.
+-- | Creates deconstructors for all list categories.
 makeListDecons :: CF -> String
 makeListDecons cf = unlines $ map (makeListDecon cf) listCats
     where
@@ -97,7 +99,7 @@ makeListDecons cf = unlines $ map (makeListDecon cf) listCats
         listCats = [c | c <- valCats, isList c]
 
 
--- Creates a deconstructor for some list category.
+-- | Creates a deconstructor for some list category.
 makeListDecon :: CF -> Cat -> String
 makeListDecon cf c = concat 
   [ "def list" ++ name ++ "Decon(xs):\n"
@@ -120,10 +122,11 @@ makeListDecon cf c = concat
       [] -> Nothing
       rs -> Just (head rs)
 
-    -- List rules are of the form:
-    -- [C] ::= symbols.. C symbols.. [C]
-    -- The production, in Python, is concatenated recursively:
-    -- symbols.. + c(xs[0], 'C1') + symbols.. + listCDecon(xs[1:]) + symbols..
+    noOneFun = case oneRule of
+      Nothing -> True
+      _       -> False
+
+    -- Builds the production recursively
     sentFormToArgs :: Int -> [Either Cat String] -> String
     sentFormToArgs _ [] = "[]"
     sentFormToArgs v (Right strOp:ecss) =
@@ -131,7 +134,7 @@ makeListDecon cf c = concat
       sentFormToArgs v ecss
     sentFormToArgs v (Left _:ecss)
       | v == 0 = "c(xs[0], '" ++ name ++ "') + " ++ sentFormToArgs (v+1) ecss
-      | v == 1 = "list" ++ name ++ "Decon(xs[1:]) + " ++ 
+      | v == 1 = error "Python backend error - should use iterative approach for cons" --"list" ++ name ++ "Decon(xs[1:]) + " ++ 
       sentFormToArgs (v+1) ecss
       | otherwise = error "A list production can max have C and [C]."
 
@@ -149,9 +152,37 @@ makeListDecon cf c = concat
         , "    return " ++ sentFormToArgs 0 (rhsRule r)
         ]
 
+    -- Adds each element with delims iteratively
     consRuleStr = case consRule of
       Nothing -> ""
-      Just r -> "  return " ++ sentFormToArgs 0 (rhsRule r) ++ "\n"
+      Just r -> unlines 
+        [ "  " ++ start
+        , "  for x in xs[:" ++ endIndice ++ "][::-1]:"
+        , "    tot += " ++ add endlims ++ "[]"
+        , "    tot = " ++ add delims ++ "tot"
+        , "    tot = c(x, '" ++ name ++ "') + tot"
+        , "    tot = " ++ add prelims ++ "tot"
+        , "  return tot"
+        ]
+        where
+          ecss = rhsRule r
+          indices = findIndices isLeft ecss
+          i1 = head indices
+          i2 = last indices
+          prelims = rights $ take i1 ecss
+          endlims = rights $ drop i2 ecss
+          delims  = rights $ drop i1 $ take i2 ecss
+  
+          start
+            | not noOneFun = "tot = list" ++ name ++ "Decon(xs[-1:])"
+            | otherwise = "tot = list" ++ name ++ "Decon([])"
+
+          add :: [String] -> String 
+          add ss = concat $ map (\s-> "['" ++ escapeChars s ++ "'] + ") ss
+
+          endIndice
+            | not noOneFun = "-1"
+            | otherwise    = ""
 
 
 -- | Creates the renderC function, which creates a string of a list of
@@ -286,12 +317,12 @@ makeLinFunc cf = unlines
       , "      return [ast]"
       ]
     ]
-  , "    # skeleTokenCases:"
-  , unlines skeleTokenCases
-  , "    # skeleRuleCases:"
-  , unlines skeleRuleCases
-  , -- Deals with cases where the entrypoint is say [Stm] or [Exp], 
-    -- with pattern matching on the first object in the list.
+  , "    # Token cases:"
+  , unlines tokenCases
+  , "    # Rule cases:"
+  , unlines ruleCases
+  , -- Deals with cases where the entrypoint is say [Stm] or
+    -- [Exp] with pattern matching on the first object in the list.
     "    case " ++ "list():"
   , "      if len(ast) == 0:"
   , "        return []"
@@ -305,7 +336,7 @@ makeLinFunc cf = unlines
   , "      raise Exception(str(ast.__class__) + ' unmatched')"
   ]
   where
-    -- Used to include standard literals, if needed.
+    -- To include standard literals, if needed.
     ifUsedThen :: TokenCat -> [String] -> String
     ifUsedThen cat ss
       | isUsedCat cf (TokenCat cat) = unlines ss
@@ -319,8 +350,8 @@ makeLinFunc cf = unlines
       , not (isNilCons r)
       ]
 
-    skeleTokenCases = map makeSkeleTokenCase (tokenNames cf)
-    skeleRuleCases = map makeSkeleRuleCase rules
+    tokenCases = map makeTokenCase (tokenNames cf)
+    ruleCases = map makeRuleCase rules
 
     catEntrypointsForLists = 
       [catOfList c | c <- (List1.toList . allEntryPoints) cf, isList c]
@@ -356,9 +387,9 @@ makeListEntrypointCase cf c = concat
         ]
 
 
--- Creates a case for a user defined literal, which inherits str.
-makeSkeleTokenCase :: String -> String
-makeSkeleTokenCase tokenName = concat
+-- | Creates a case for a user defined literal, which inherits str.
+makeTokenCase :: String -> String
+makeTokenCase tokenName = concat
   [ "    case " ++ unkw tokenName ++ "():\n"
   , "      return [ast]"
   ]
@@ -366,12 +397,11 @@ makeSkeleTokenCase tokenName = concat
 
 -- | Creates a case for some rule, with the additional information of what
 --   separator- and terminator-delimiters there are. 
-makeSkeleRuleCase :: Rul RFun -> String
-makeSkeleRuleCase rule = concat
+makeRuleCase :: Rul RFun -> String
+makeRuleCase rule = concat
   [ "    case " ++ unkw fName ++ "(" ++ varNamesCommad ++ "):\n"
   , "      # " ++ (showEcss sentForm) ++ "\n"
-  , "      return " ++ if (length args > 0)
-    then (intercalate " + " args) 
+  , "      return " ++ if (length args > 0) then (intercalate " + " args) 
     else "[]"
   ]
   where
diff --git a/source/src/BNFC/Backend/Python/PyHelpers.hs b/source/src/BNFC/Backend/Python/PyHelpers.hs
index f68abe13..a1258e85 100644
--- a/source/src/BNFC/Backend/Python/PyHelpers.hs
+++ b/source/src/BNFC/Backend/Python/PyHelpers.hs
@@ -87,21 +87,19 @@ showEcss (Right strOp:ecss) = "\"" ++ strOp ++ "\" " ++ (showEcss ecss)
 
 -- | Adds an underscore if the string overlaps with a keyword.
 unkw :: String -> String
-unkw s = if s `elem` kwListWithSoftKeywords then s ++ "_" else s
+unkw s = if s `elem` pythonReserved then s ++ "_" else s
 
 
--- To add an extra underscore if something overlaps with a keyword.
-kwListWithSoftKeywords :: [String]
-kwListWithSoftKeywords =
-  [ "False"
-  , "None"
-  , "True"
-  , "and"
+-- | Python keyword list plus soft keywords
+pythonReserved :: [String]
+pythonReserved =
+  [ "and"
   , "as"
   , "assert"
   , "async"
   , "await"
   , "break"
+  , "case"
   , "class"
   , "continue"
   , "def"
@@ -109,6 +107,7 @@ kwListWithSoftKeywords =
   , "elif"
   , "else"
   , "except"
+  , "False"
   , "finally"
   , "for"
   , "from"
@@ -118,19 +117,19 @@ kwListWithSoftKeywords =
   , "in"
   , "is"
   , "lambda"
+  , "match"
+  , "None"
   , "nonlocal"
   , "not"
   , "or"
   , "pass"
   , "raise"
   , "return"
+  , "True"
   , "try"
+  , "type"
   , "while"
   , "with"
   , "yield"
   , "_"
-  , "case"
-  , "match"
-  , "type"
   ]
-
diff --git a/testing/src/ParameterizedTests.hs b/testing/src/ParameterizedTests.hs
index 8231c8eb..c5876296 100644
--- a/testing/src/ParameterizedTests.hs
+++ b/testing/src/ParameterizedTests.hs
@@ -450,7 +450,7 @@ parameters = concat
         }
     pythonParams = base
         { tpBuild = do
-            return () -- nothing to make or compile
+          tpMake
         ,
           tpRunTestProg = \ _lang args -> do
             pyFile_ <- findFile "genTest.py"