tim-dlang
diff --git a/‎.github/workflows/tests.yml
+10-1 b/‎.github/workflows/tests.yml
+10-1
diff --git a/‎README.md
+4 b/‎README.md
+4
diff --git a/‎docs/api.md
+2 b/‎docs/api.md
+2
diff --git a/‎examples/python/README.md
+31 b/‎examples/python/README.md
+31
diff --git a/‎examples/python/dub.json
+27 b/‎examples/python/dub.json
+27
diff --git a/‎examples/python/grammarpeg.ebnf
+71 b/‎examples/python/grammarpeg.ebnf
+71
@@ -62,6 +62,15 @@ jobs:
         ref: d64aefb55228d9584d3e5b2433f720ea8fd00c82
         persist-credentials: false
 
+    - name: Checkout CPython
+      if: ${{ matrix.platform != 'win32' }}
+      uses: actions/checkout@v3
+      with:
+        path: cpython
+        repository: python/cpython
+        ref: 3979150a0d406707f6d253d7c15fb32c1e005a77
+        persist-credentials: false
+
     - name: 'Build & Test'
       run: |
-        ${{ env.DC }} -run runtests.d --compiler ${{ env.DC }} -m${{ matrix.model }} --json-test-dir JSONTestSuite/test_parsing --dmd-dir dmd --avoid-parallel-memory-usage --github ${{ matrix.extra_args }}
+        ${{ env.DC }} -run runtests.d --compiler ${{ env.DC }} -m${{ matrix.model }} --json-test-dir JSONTestSuite/test_parsing --dmd-dir dmd --python-test-dir cpython/Lib/test --avoid-parallel-memory-usage --github ${{ matrix.extra_args }}
@@ -51,6 +51,10 @@ The parser for C++ uses GLR, while the grammar for the preprocessor can
 use LALR. The example application shows the parse tree for a C++ file,
 which needs to be already preprocessed.
 
+An example for parsing Python is is folder [examples/python/](examples/python/).
+It uses a wrapper around the generated lexer, which keeps track of the
+indentation level.
+
 The folder [tests/grammars/](tests/grammars/) also contains example grammars, but some
 of them test corner cases and should not be used as examples for
 real grammars.
 
@@ -66,6 +66,8 @@ different things, like for example:
   [lexer hack](https://en.wikipedia.org/wiki/Lexer_hack) for C.
 * Store or process comments, which are ignored by the parser.
 * Add debug output without modifying lexer or parser directly.
+* Keep track of the indentation level for languages like Python, see
+  example in [examples/python/](../examples/python/).
 
 ## Tree Creator
 
 
@@ -0,0 +1,31 @@
+# Example grammar for Python
+
+This is an example for parsing [Python](https://www.python.org/).
+The grammar is in file grammarpython.ebnf. Application testpython.d uses
+it to parse Python files and print a parse tree.
+
+Python uses the indentation to define the structure of the source code.
+The generated lexer does not directly implement this. Instead the
+grammar contains the tokens `Indent` and `Dedent` without a definition.
+A wrapper around the lexer in the application keeps track of the current
+indentation and generates these tokens, so the generated parser can use
+them.
+
+The application can be built with the following command:
+```sh
+dub build
+```
+
+It is also possible to test the grammar on test cases from
+https://github.com/python/cpython/tree/main/Lib/test using the argument `--test-dir`:
+```sh
+git clone https://github.com/python/cpython.git
+git -C cpython checkout 3979150a0d406707f6d253d7c15fb32c1e005a77
+./example_python --test-dir cpython/Lib/test/
+```
+It will only print on errors, so the expected output is empty.
+
+The grammar is based on the official grammar at https://docs.python.org/3/reference/lexical_analysis.html
+and https://docs.python.org/3/reference/grammar.html.
+The grammar was converted with the program grammarpythongen.d, which
+uses grammarpeg.ebnf. Some manual changes were also made.
@@ -0,0 +1,27 @@
+{
+    "name": "example_python",
+    "description": "Example Python for DParserGen",
+    "authors": ["Tim Schendekehl"],
+    "license": "BSL-1.0",
+    "targetType": "executable",
+    "dependencies": {
+        "dparsergen:core": {
+            "version": "*",
+            "path": "../.."
+        },
+        "dparsergen:generator": {
+            "version": "*",
+            "path": "../.."
+        }
+    },
+    "sourceFiles": [
+        "testpython.d",
+        "grammarpython.d",
+        "grammarpython_lexer.d"
+    ],
+    "lflags-windows": ["/STACK:10485760"],
+    "preBuildCommands": [
+        "\"$DUB\" run --root=../../ :generator -- grammarpython.ebnf -o grammarpython.d --lexer grammarpython_lexer.d"
+    ],
+    "buildRequirements": ["allowWarnings"]
+}
@@ -0,0 +1,71 @@
+PEG = Definition+;
+
+Definition
+    = Name BracketExpression? Memo? ":" Newline* "|"? Productions Newline
+    | Newline
+    | "@" "trailer" StringLiteralLong Newline
+    ;
+
+Memo = "(" Expression ")";
+
+Productions @array
+    = Production
+    | Productions Newline? "|" Production
+    ;
+
+Production = NamedExpression+ Code?;
+
+NamedExpression @backtrack
+    = Name BracketExpression? "=" PrefixExpression
+    | <PrefixExpression
+    ;
+PrefixExpression
+    = "!" Expression
+    | "&" PrefixExpression
+    | "&&" PrefixExpression
+    | StringLiteral "." Expression "+"
+    | StringLiteral "..." StringLiteral
+    | <Expression
+    | "~"
+    ;
+Expression @noOptDescent
+    = Name
+    | StringLiteral
+    | ExtraText
+    | <BracketExpression
+    | Expression "*"
+    | Expression "+"
+    | Expression "?"
+    | "(" Productions ")"
+    ;
+BracketExpression
+    = "[" Productions "]"
+    ;
+
+token StringLiteralLong @minimalMatch
+    = "'''" [^]* "'''"
+    ;
+token StringLiteral
+    = "'" [^']* "'" !"'"
+    | "\"" [^"]* "\""
+    ;
+token ExtraText
+    = "<" [^<>]* ">"
+    ;
+
+token Code = "{" {[^{}"'] | StringLiteral}* "}";
+
+token Name @lowPrio
+    = [a-zA-Z_] [a-zA-Z_0-9]*
+    | "`" [a-zA-Z_] [a-zA-Z_0-9]* "`"
+    ;
+
+token Space @ignoreToken
+    = [ \t]+
+    ;
+token Comment @ignoreToken
+    = "#" [^\n]*
+    ;
+token Newline
+    = "\n" | "\r" | "\r\n"
+    ;