From f024ec15c06d657fe850e68e603fed3b46bca200 Mon Sep 17 00:00:00 2001 From: Ken Domino Date: Sun, 25 Jun 2023 09:06:03 -0400 Subject: [PATCH] Removed Python2 target. Signed-off-by: Ken Domino --- .github/workflows/hosted.yml | 10 - README.md | 2 +- doc/IDEs.md | 2 +- doc/antlr-project-testing.md | 1 - doc/getting-started.md | 2 +- doc/python-target.md | 312 +++- doc/releasing-antlr.md | 15 +- doc/target-agnostic-grammars.md | 46 + doc/targets.md | 8 +- .../ParserErrors/ExtraneousInput.txt | 1 - .../DropLoopEntryBranchInLRRule_4.txt | 1 - .../PredFromAltTestedInLoopBack_1.txt | 1 - .../test/runtime/templates/Python2.test.stg | 286 --- .../v4/test/runtime/CustomDescriptors.java | 2 +- .../test/runtime/python2/Python2Runner.java | 33 - .../runtime/python2/Python2RuntimeTests.java | 17 - runtime/Python2/MANIFEST.in | 1 - runtime/Python2/README.txt | 4 - runtime/Python2/RELEASE-4.5.txt | 13 - runtime/Python2/setup.py | 14 - .../Python2/src/antlr4/BufferedTokenStream.py | 306 --- runtime/Python2/src/antlr4/CodePoints.py | 73 - .../Python2/src/antlr4/CommonTokenFactory.py | 59 - .../Python2/src/antlr4/CommonTokenStream.py | 85 - runtime/Python2/src/antlr4/FileStream.py | 32 - runtime/Python2/src/antlr4/InputStream.py | 108 -- runtime/Python2/src/antlr4/IntervalSet.py | 296 --- runtime/Python2/src/antlr4/LL1Analyzer.py | 170 -- runtime/Python2/src/antlr4/Lexer.py | 319 ---- runtime/Python2/src/antlr4/ListTokenSource.py | 143 -- runtime/Python2/src/antlr4/Parser.py | 563 ------ .../Python2/src/antlr4/ParserInterpreter.py | 163 -- .../Python2/src/antlr4/ParserRuleContext.py | 181 -- .../Python2/src/antlr4/PredictionContext.py | 632 ------- runtime/Python2/src/antlr4/Recognizer.py | 141 -- runtime/Python2/src/antlr4/RuleContext.py | 225 --- runtime/Python2/src/antlr4/StdinStream.py | 21 - runtime/Python2/src/antlr4/Token.py | 159 -- .../Python2/src/antlr4/TokenStreamRewriter.py | 252 --- runtime/Python2/src/antlr4/Utils.py | 39 - runtime/Python2/src/antlr4/__init__.py | 21 - runtime/Python2/src/antlr4/atn/ATN.py | 122 -- runtime/Python2/src/antlr4/atn/ATNConfig.py | 149 -- .../Python2/src/antlr4/atn/ATNConfigSet.py | 210 --- .../antlr4/atn/ATNDeserializationOptions.py | 21 - .../Python2/src/antlr4/atn/ATNDeserializer.py | 446 ----- .../Python2/src/antlr4/atn/ATNSimulator.py | 45 - runtime/Python2/src/antlr4/atn/ATNState.py | 255 --- runtime/Python2/src/antlr4/atn/ATNType.py | 12 - .../src/antlr4/atn/LexerATNSimulator.py | 554 ------ runtime/Python2/src/antlr4/atn/LexerAction.py | 291 --- .../src/antlr4/atn/LexerActionExecutor.py | 135 -- .../src/antlr4/atn/ParserATNSimulator.py | 1659 ----------------- .../Python2/src/antlr4/atn/PredictionMode.py | 495 ----- .../Python2/src/antlr4/atn/SemanticContext.py | 328 ---- runtime/Python2/src/antlr4/atn/Transition.py | 252 --- runtime/Python2/src/antlr4/atn/__init__.py | 1 - runtime/Python2/src/antlr4/dfa/DFA.py | 135 -- .../Python2/src/antlr4/dfa/DFASerializer.py | 74 - runtime/Python2/src/antlr4/dfa/DFAState.py | 124 -- runtime/Python2/src/antlr4/dfa/__init__.py | 1 - .../antlr4/error/DiagnosticErrorListener.py | 106 -- .../Python2/src/antlr4/error/ErrorListener.py | 72 - .../Python2/src/antlr4/error/ErrorStrategy.py | 702 ------- runtime/Python2/src/antlr4/error/Errors.py | 157 -- runtime/Python2/src/antlr4/error/__init__.py | 1 - runtime/Python2/src/antlr4/tree/Chunk.py | 32 - .../Python2/src/antlr4/tree/ParseTreeMatch.py | 120 -- .../src/antlr4/tree/ParseTreePattern.py | 69 - .../antlr4/tree/ParseTreePatternMatcher.py | 367 ---- .../Python2/src/antlr4/tree/RuleTagToken.py | 48 - .../Python2/src/antlr4/tree/TokenTagToken.py | 47 - runtime/Python2/src/antlr4/tree/Tree.py | 186 -- runtime/Python2/src/antlr4/tree/Trees.py | 110 -- runtime/Python2/src/antlr4/tree/__init__.py | 0 runtime/Python2/src/antlr4/xpath/XPath.py | 269 --- .../Python2/src/antlr4/xpath/XPathLexer.g4 | 45 - .../Python2/src/antlr4/xpath/XPathLexer.py | 93 - runtime/Python2/src/antlr4/xpath/__init__.py | 1 - .../Python2/tests/TestTokenStreamRewriter.py | 524 ------ runtime/Python2/tests/__init__.py | 0 runtime/Python2/tests/mocks/TestLexer.py | 101 - runtime/Python2/tests/mocks/__init__.py | 0 runtime/Python2/tests/run.py | 7 - scripts/files-to-update.txt | 3 - scripts/github_release_notes.py | 2 +- .../v4/test/tool/TestUnicodeEscapes.java | 3 - .../templates/codegen/Python2/Python2.stg | 829 -------- .../org/antlr/v4/codegen/UnicodeEscapes.java | 1 - .../v4/codegen/target/Python2Target.java | 92 - 90 files changed, 278 insertions(+), 13777 deletions(-) create mode 100644 doc/target-agnostic-grammars.md delete mode 100644 runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/Python2.test.stg delete mode 100644 runtime-testsuite/test/org/antlr/v4/test/runtime/python2/Python2Runner.java delete mode 100644 runtime-testsuite/test/org/antlr/v4/test/runtime/python2/Python2RuntimeTests.java delete mode 100644 runtime/Python2/MANIFEST.in delete mode 100644 runtime/Python2/README.txt delete mode 100644 runtime/Python2/RELEASE-4.5.txt delete mode 100644 runtime/Python2/setup.py delete mode 100644 runtime/Python2/src/antlr4/BufferedTokenStream.py delete mode 100644 runtime/Python2/src/antlr4/CodePoints.py delete mode 100644 runtime/Python2/src/antlr4/CommonTokenFactory.py delete mode 100644 runtime/Python2/src/antlr4/CommonTokenStream.py delete mode 100644 runtime/Python2/src/antlr4/FileStream.py delete mode 100644 runtime/Python2/src/antlr4/InputStream.py delete mode 100644 runtime/Python2/src/antlr4/IntervalSet.py delete mode 100644 runtime/Python2/src/antlr4/LL1Analyzer.py delete mode 100644 runtime/Python2/src/antlr4/Lexer.py delete mode 100644 runtime/Python2/src/antlr4/ListTokenSource.py delete mode 100644 runtime/Python2/src/antlr4/Parser.py delete mode 100644 runtime/Python2/src/antlr4/ParserInterpreter.py delete mode 100644 runtime/Python2/src/antlr4/ParserRuleContext.py delete mode 100644 runtime/Python2/src/antlr4/PredictionContext.py delete mode 100644 runtime/Python2/src/antlr4/Recognizer.py delete mode 100644 runtime/Python2/src/antlr4/RuleContext.py delete mode 100644 runtime/Python2/src/antlr4/StdinStream.py delete mode 100644 runtime/Python2/src/antlr4/Token.py delete mode 100644 runtime/Python2/src/antlr4/TokenStreamRewriter.py delete mode 100644 runtime/Python2/src/antlr4/Utils.py delete mode 100644 runtime/Python2/src/antlr4/__init__.py delete mode 100644 runtime/Python2/src/antlr4/atn/ATN.py delete mode 100644 runtime/Python2/src/antlr4/atn/ATNConfig.py delete mode 100755 runtime/Python2/src/antlr4/atn/ATNConfigSet.py delete mode 100644 runtime/Python2/src/antlr4/atn/ATNDeserializationOptions.py delete mode 100644 runtime/Python2/src/antlr4/atn/ATNDeserializer.py delete mode 100644 runtime/Python2/src/antlr4/atn/ATNSimulator.py delete mode 100644 runtime/Python2/src/antlr4/atn/ATNState.py delete mode 100644 runtime/Python2/src/antlr4/atn/ATNType.py delete mode 100644 runtime/Python2/src/antlr4/atn/LexerATNSimulator.py delete mode 100644 runtime/Python2/src/antlr4/atn/LexerAction.py delete mode 100644 runtime/Python2/src/antlr4/atn/LexerActionExecutor.py delete mode 100755 runtime/Python2/src/antlr4/atn/ParserATNSimulator.py delete mode 100644 runtime/Python2/src/antlr4/atn/PredictionMode.py delete mode 100644 runtime/Python2/src/antlr4/atn/SemanticContext.py delete mode 100644 runtime/Python2/src/antlr4/atn/Transition.py delete mode 100644 runtime/Python2/src/antlr4/atn/__init__.py delete mode 100644 runtime/Python2/src/antlr4/dfa/DFA.py delete mode 100644 runtime/Python2/src/antlr4/dfa/DFASerializer.py delete mode 100644 runtime/Python2/src/antlr4/dfa/DFAState.py delete mode 100644 runtime/Python2/src/antlr4/dfa/__init__.py delete mode 100644 runtime/Python2/src/antlr4/error/DiagnosticErrorListener.py delete mode 100644 runtime/Python2/src/antlr4/error/ErrorListener.py delete mode 100644 runtime/Python2/src/antlr4/error/ErrorStrategy.py delete mode 100644 runtime/Python2/src/antlr4/error/Errors.py delete mode 100644 runtime/Python2/src/antlr4/error/__init__.py delete mode 100644 runtime/Python2/src/antlr4/tree/Chunk.py delete mode 100644 runtime/Python2/src/antlr4/tree/ParseTreeMatch.py delete mode 100644 runtime/Python2/src/antlr4/tree/ParseTreePattern.py delete mode 100644 runtime/Python2/src/antlr4/tree/ParseTreePatternMatcher.py delete mode 100644 runtime/Python2/src/antlr4/tree/RuleTagToken.py delete mode 100644 runtime/Python2/src/antlr4/tree/TokenTagToken.py delete mode 100644 runtime/Python2/src/antlr4/tree/Tree.py delete mode 100644 runtime/Python2/src/antlr4/tree/Trees.py delete mode 100644 runtime/Python2/src/antlr4/tree/__init__.py delete mode 100644 runtime/Python2/src/antlr4/xpath/XPath.py delete mode 100644 runtime/Python2/src/antlr4/xpath/XPathLexer.g4 delete mode 100644 runtime/Python2/src/antlr4/xpath/XPathLexer.py delete mode 100644 runtime/Python2/src/antlr4/xpath/__init__.py delete mode 100644 runtime/Python2/tests/TestTokenStreamRewriter.py delete mode 100644 runtime/Python2/tests/__init__.py delete mode 100644 runtime/Python2/tests/mocks/TestLexer.py delete mode 100644 runtime/Python2/tests/mocks/__init__.py delete mode 100644 runtime/Python2/tests/run.py delete mode 100644 tool/resources/org/antlr/v4/tool/templates/codegen/Python2/Python2.stg delete mode 100644 tool/src/org/antlr/v4/codegen/target/Python2Target.java diff --git a/.github/workflows/hosted.yml b/.github/workflows/hosted.yml index 01deeaea07..1511fbf80c 100644 --- a/.github/workflows/hosted.yml +++ b/.github/workflows/hosted.yml @@ -174,7 +174,6 @@ jobs: javascript, typescript, php, - python2, python3, # swift, ] @@ -233,13 +232,6 @@ jobs: if: startswith(matrix.os, 'windows') && (matrix.target == 'cpp') uses: microsoft/setup-msbuild@v1.1 - - name: Set up Python 2 - if: matrix.target == 'python2' - uses: actions/setup-python@v4 - with: - python-version: '2.x' - architecture: 'x64' - - name: Set up Python 3 if: matrix.target == 'python3' uses: actions/setup-python@v4 @@ -316,7 +308,6 @@ jobs: cd runtime-testsuite switch ("${{ matrix.target }}") { - python2 { mvn -X '-Dantlr-python2-exec="${{ env.pythonLocation }}\python.exe"' '-Dtest=python2.**' test } python3 { mvn -X '-Dantlr-python3-exec="${{ env.pythonLocation }}\python.exe"' '-Dtest=python3.**' test } default { mvn -X '-Dtest=${{ matrix.target }}.**' test } } @@ -331,7 +322,6 @@ jobs: cd runtime-testsuite case ${{ matrix.target }} in - python2) mvn -X '-Dantlr-python2-exec=${{ env.pythonLocation }}/bin/python' '-Dtest=python2.**' test ;; python3) mvn -X '-Dantlr-python3-exec=${{ env.pythonLocation }}/bin/python' '-Dtest=python3.**' test ;; *) mvn -X '-Dtest=${{ matrix.target }}.**' test ;; esac diff --git a/README.md b/README.md index c05b3f8c0b..c6675582ed 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ ANTLR project lead and supreme dictator for life * [Official site](http://www.antlr.org/) * [Documentation](https://github.com/antlr/antlr4/blob/master/doc/index.md) * [FAQ](https://github.com/antlr/antlr4/blob/master/doc/faq/index.md) -* [ANTLR code generation targets](https://github.com/antlr/antlr4/blob/master/doc/targets.md)
(Currently: Java, C#, Python2|3, JavaScript, Go, C++, Swift, Dart, PHP) +* [ANTLR code generation targets](https://github.com/antlr/antlr4/blob/master/doc/targets.md)
(Currently: Java, C#, Python3, JavaScript, Go, C++, Swift, Dart, PHP) * [Java API](http://www.antlr.org/api/Java/index.html) * [ANTLR v3](http://www.antlr3.org/) * [v3 to v4 Migration, differences](https://github.com/antlr/antlr4/blob/master/doc/faq/general.md) diff --git a/doc/IDEs.md b/doc/IDEs.md index 7c21e8af01..9e98503d3c 100644 --- a/doc/IDEs.md +++ b/doc/IDEs.md @@ -1,5 +1,5 @@ # Integrating ANTLR into Development Systems -The Java target is the reference implementation mirrored by other targets. The following pages help you integrate ANTLR into development environments and build systems appropriate for your target language. As of December 2016, we have Java, C#, Python 2, Python 3, JavaScript, Go, C++, and Swift targets. +The Java target is the reference implementation mirrored by other targets. The following pages help you integrate ANTLR into development environments and build systems appropriate for your target language. As of December 2016, we have Java, C#, Python 3, JavaScript, Go, C++, and Swift targets. The easiest thing is probably just to use an [ANTLR plug-in](http://www.antlr.org/tools.html) for your favorite development environment. diff --git a/doc/antlr-project-testing.md b/doc/antlr-project-testing.md index fc93c7c9a4..8968a4bdfb 100644 --- a/doc/antlr-project-testing.md +++ b/doc/antlr-project-testing.md @@ -56,7 +56,6 @@ In order to perform the tests on all target languages, the following tools shoul * dotnet * Node.js -* Python 2 * Python 3 * Go * Swift diff --git a/doc/getting-started.md b/doc/getting-started.md index 1a40594a3b..474c98ff23 100644 --- a/doc/getting-started.md +++ b/doc/getting-started.md @@ -44,7 +44,7 @@ On Windows, the `pip` command doesn't just work---you need to add the `...\local 1. Go to the Microsoft Store 2. Search in Microsoft Store for Python -3. Select the newest version of Python (3.10). +3. Select the newest version of Python (3.11). 4. Click the "Get" button. Store installs python and pip at "c:\Users...\AppData\Local\Microsoft\WindowsApps\python.exe" and "c:\Users...\AppData\Local\Microsoft\WindowsApps\pip.exe", respectively. And, it updates the search path immediately with the install. 5. Open a "cmd" terminal. 6. You can now type "python" and "pip", and "pip install antlr4-tools". 7. Unfortunately, it does not add that to the search path. diff --git a/doc/python-target.md b/doc/python-target.md index 6f7f6a84a9..999d78d512 100644 --- a/doc/python-target.md +++ b/doc/python-target.md @@ -1,19 +1,32 @@ -# Python (2 and 3) +# Python 3 -The examples from the ANTLR 4 book converted to Python are [here](https://github.com/jszheng/py3antlr4book). +## Requirements -There are 2 Python targets: `Python2` and `Python3`. This is because there is only limited compatibility between those 2 versions of the language. Please refer to the [Python documentation](https://wiki.python.org/moin/Python2orPython3) for full details. +<<<<<<< HEAD +You will need to install the minimum requirements for Python and Pip. +See https://www.python.org/downloads/, version 3.6 or better, +and https://www.geeksforgeeks.org/how-to-install-pip-on-windows/. +======= +There is ONE Python target: `Python3`. This is because there is only limited compatibility between those 2 versions of the language. Please refer to the [Python documentation](https://wiki.python.org/moin/Python3) for full details. +>>>>>>> 004792243 (Removing Python2 from additional places.) -How to create a Python lexer or parser? -This is pretty much the same as creating a Java lexer or parser, except you need to specify the language target, for example: +## A minimal example +An example of a parser with the Python3 target consists of the following files. +* An Antlr4 grammar, e.g., Expr.g4: ``` -$ antlr4 -Dlanguage=Python2 MyGrammar.g4 -``` - -or - +<<<<<<< HEAD +grammar Expr; +start_ : expr (';' expr)* EOF; +expr : atom | ('+' | '-') expr | expr '**' expr | expr ('*' | '/') expr | expr ('+' | '-') expr | '(' expr ')' | atom ; +atom : INT ; +INT : [0-9]+ ; +WS : [ \t\n\r]+ -> skip ; ``` +* Driver.py: +The driver code consists at a minimum code to open a file, create a lexer, token stream, +and parser, then call the parser. +======= $ antlr4 -Dlanguage=Python3 MyGrammar.g4 ``` @@ -21,9 +34,8 @@ For a full list of antlr4 tool options, please visit the tool documentation page ## Where can I get the runtime? -Once you've generated the lexer and/or parser code, you need to download the runtime. The Python runtimes are available from PyPI: +Once you've generated the lexer and/or parser code, you need to download the runtime. The Python runtime are available from PyPI: -* https://pypi.python.org/pypi/antlr4-python2-runtime/ * https://pypi.python.org/pypi/antlr4-python3-runtime/ The runtimes are provided in the form of source code, so no additional installation is required. @@ -43,112 +55,232 @@ Let's suppose that your grammar is named, as above, "MyGrammar". Let's suppose t Now a fully functioning script might look like the following: +>>>>>>> 004792243 (Removing Python2 from additional places.) ```python import sys from antlr4 import * -from MyGrammarLexer import MyGrammarLexer -from MyGrammarParser import MyGrammarParser - +from ExprLexer import ExprLexer +from ExprParser import ExprParser +from VisitorInterp import VisitorInterp + def main(argv): input_stream = FileStream(argv[1]) - lexer = MyGrammarLexer(input_stream) + lexer = ExprLexer(input_stream) stream = CommonTokenStream(lexer) - parser = MyGrammarParser(stream) - tree = parser.startRule() + parser = ExprParser(stream) + tree = parser.start_() if __name__ == '__main__': main(sys.argv) ``` - -This program will work. But it won't be useful unless you do one of the following: - -* you visit the parse tree using a custom listener -* you visit the parse tree using a custom visitor -* your grammar comprises production code (like ANTLR3) - -(please note that production code is target specific, so you can't have multi target grammars that include production code, except for very limited use cases, see below) - -## How do I create and run a custom listener? - -Let's suppose your MyGrammar grammar comprises 2 rules: "key" and "value". The antlr4 tool will have generated the following listener: - -```python -class MyGrammarListener(ParseTreeListener): - def enterKey(self, ctx): - pass - def exitKey(self, ctx): - pass - def enterValue(self, ctx): - pass - def exitValue(self, ctx): - pass +* requirements.txt: +This file contains a list of the +required packages for the program. Required +packages are downloaded by `pip`. The file +must include a reference to the Antlr Python3 runtime. ``` - -In order to provide custom behavior, you might want to create the following class: - -```python -class KeyPrinter(MyGrammarListener): - def exitKey(self, ctx): - print("Oh, a key!") +antlr4-python3-runtime==4.13.0 ``` - -In order to execute this listener, you would simply add the following lines to the above code: - +* A build script, e.g., build.sh: +You should provide a script that builds the program. ``` - ... - tree = parser.startRule() - only repeated here for reference - printer = KeyPrinter() - walker = ParseTreeWalker() - walker.walk(printer, tree) +pip install -r requirements.txt +antlr4 -v 4.13.0 -Dlanguage=Python3 Expr.g4 ``` - -Further information can be found from the ANTLR 4 definitive guide. +_It is vital that the versions for the +Antlr tool used to generate the parser +and the Antlr Python3 runtime match. +E.g., 4.13.0. Using build files will help +eliminate common errors that happen._ + +_For a list of antlr4 tool options, please visit the [ANTLR Tool Command Line Options](https://github.com/antlr/antlr4/blob/master/doc/tool-options.md) documentation._ +* Input, e.g., input.txt: +``` +-(1 + 2)/3; +1; +2+3; +8*9 +``` +* Run script +You should provide a script to run your program. +``` +python Driver.py input.txt +``` + +## Visitors -The Python implementation of ANTLR is as close as possible to the Java one, so you shouldn't find it difficult to adapt the examples for Python. +Antlr listeners and visitors are implementations that traverse a parse tree in unique ways. But both are used to implement +[static](https://en.wikipedia.org/wiki/Static_program_analysis) or [dynamic](https://en.wikipedia.org/wiki/Dynamic_program_analysis) +program analysis. It is essential to understand when to choose a listener versus a visitor. +For further information, see https://tomassetti.me/listeners-and-visitors/. +A visitor is the best choice when computing a synthesized attribute. +Alternatively, a listener is the best choice when computing both synthesized +and inherited attributes. -## Target agnostic grammars +To implement a visitor, add the `-visitor` option to the `antlr4` command. +Add a class that inherits from the generated visitor +with code that implements the analysis. -If your grammar is targeted to Python only, you may ignore the following. But if your goal is to get your Java parser to also run in Python, then you might find it useful. +For example, the following code implements an expression evaluator for the Expr.g4 grammar using a visitor. -1. Do not embed production code inside your grammar. This is not portable and will not be. Move all your code to listeners or visitors. -1. The only production code absolutely required to sit with the grammar should be semantic predicates, like: +* Driver.py: +```python +import sys +from antlr4 import * +from ExprLexer import ExprLexer +from ExprParser import ExprParser +from VisitorInterp import VisitorInterp + +def main(argv): + input_stream = FileStream(argv[1]) + lexer = ExprLexer(input_stream) + stream = CommonTokenStream(lexer) + parser = ExprParser(stream) + tree = parser.start_() + if parser.getNumberOfSyntaxErrors() > 0: + print("syntax errors") + else: + vinterp = VisitorInterp() + vinterp.visit(tree) + +if __name__ == '__main__': + main(sys.argv) ``` -ID {$text.equals("test")}? +* VisitorInterp.py: +```python +import sys +from antlr4 import * +from ExprParser import ExprParser +from ExprVisitor import ExprVisitor + +class VisitorInterp(ExprVisitor): + def visitAtom(self, ctx:ExprParser.AtomContext): + return int(ctx.getText()) + + def visitExpr(self, ctx:ExprParser.ExprContext): + if ctx.getChildCount() == 3: + if ctx.getChild(0).getText() == "(": + return self.visit(ctx.getChild(1)) + op = ctx.getChild(1).getText() + v1 = self.visit(ctx.getChild(0)) + v2 = self.visit(ctx.getChild(2)) + if op == "+": + return v1 + v2 + if op == "-": + return v1 - v2 + if op == "*": + return v1 * v2 + if op == "/": + return v1 / v2 + return 0 + if ctx.getChildCount() == 2: + opc = ctx.getChild(0).getText() + if opc == "+": + return self.visit(ctx.getChild(1)) + if opc == "-": + return - self.visit(ctx.getChild(1)) + return 0 + if ctx.getChildCount() == 1: + return self.visit(ctx.getChild(0)) + return 0 + + def visitStart_(self, ctx:ExprParser.Start_Context): + for i in range(0, ctx.getChildCount(), 2): + print(self.visit(ctx.getChild(i))) + return 0 ``` -Unfortunately, this is not portable, as Java and Python (and other target languages) have different syntaxes for all but the simplest language elements. But you can work around it. The trick involves: +## Listeners + +Antlr listeners are an alternative to implement program analysis. It differs from a visitor in that +there are `enter` and `exit` methods called during the LR tranversal. You can use this tree walker +to implement both [inherited](https://en.wikipedia.org/wiki/Attribute_grammar#Inherited_attributes) +and [synthesized attribute](https://en.wikipedia.org/wiki/Attribute_grammar#Synthesized_attributes) +analysis. -* deriving your parser from a parser you provide, such as BaseParser -* implementing utility methods, such as "isEqualText", in this BaseParser, in different files for each target language -* invoking your utility methods in the semantic predicate from the `$parser` object +To implement a listener, add the `-listener` option to the `antlr4` command. +Add a class that inherits from the generated listener +with code that implements the analysis. -Thanks to the above, you should be able to rewrite the above semantic predicate as follows: +The following example implements an expression evaluator using a listener. -File `MyGrammarParser.g4`: -``` -options { superClass = MyGrammarBaseParser; } -... -ID {$parser.isEqualText($text,"test")}? -``` - -File `MyGrammarBaseParser.py`: +* Driver.py: ```python +import sys from antlr4 import * +from ExprLexer import ExprLexer +from ExprParser import ExprParser +from ListenerInterp import ListenerInterp -class MyGrammarBaseParser(Parser): - - def isEqualText(a, b): - return a is b +def main(argv): + input_stream = FileStream(argv[1]) + lexer = ExprLexer(input_stream) + stream = CommonTokenStream(lexer) + parser = ExprParser(stream) + tree = parser.start_() + if parser.getNumberOfSyntaxErrors() > 0: + print("syntax errors") + else: + linterp = ListenerInterp() + walker = ParseTreeWalker() + walker.walk(linterp, tree) + +if __name__ == '__main__': + main(sys.argv) +``` +* ListenerInterp.py: +```python +import sys +from antlr4 import * +from ExprParser import ExprParser +from ExprListener import ExprListener + +class ListenerInterp(ExprListener): + def __init__(self): + self.result = {} + + def exitAtom(self, ctx:ExprParser.AtomContext): + self.result[ctx] = int(ctx.getText()) + + def exitExpr(self, ctx:ExprParser.ExprContext): + if ctx.getChildCount() == 3: + if ctx.getChild(0).getText() == "(": + self.result[ctx] = self.result[ctx.getChild(1)] + else: + opc = ctx.getChild(1).getText() + v1 = self.result[ctx.getChild(0)] + v2 = self.result[ctx.getChild(2)] + if opc == "+": + self.result[ctx] = v1 + v2 + elif opc == "-": + self.result[ctx] = v1 - v2 + elif opc == "*": + self.result[ctx] = v1 * v2 + elif opc == "/": + self.result[ctx] = v1 / v2 + else: + ctx.result[ctx] = 0 + elif ctx.getChildCount() == 2: + opc = ctx.getChild(0).getText() + if opc == "+": + v = self.result[ctx.getChild(1)] + self.result[ctx] = v + elif opc == "-": + v = self.result[ctx.getChild(1)] + self.result[ctx] = - v + elif ctx.getChildCount() == 1: + self.result[ctx] = self.result[ctx.getChild(0)] + + def exitStart_(self, ctx:ExprParser.Start_Context): + for i in range(0, ctx.getChildCount(), 2): + print(self.result[ctx.getChild(i)]) ``` -File `MyGrammarBaseParser.java`: -```java -import org.antlr.v4.runtime.*; +Further information can be found from the ANTLR 4 definitive guide. -public abstract class MyGrammarBaseParser extends Parser { +## Examples - public static boolean isEqualText(a, b) { - return a.equals(b); - } -} -``` +The examples from the ANTLR 4 book converted to Python are [here](https://github.com/jszheng/py3antlr4book). + +There are many examples of grammars that target the Python3 target in the +[grammars-v4 Github repository](https://github.com/antlr/grammars-v4). diff --git a/doc/releasing-antlr.md b/doc/releasing-antlr.md index fde3cbe5f9..80fc4e36cd 100644 --- a/doc/releasing-antlr.md +++ b/doc/releasing-antlr.md @@ -103,9 +103,6 @@ java -cp ":/Users/parrt/.m2/repository/org/antlr/antlr4/4.13.0-SNAPSHOT/antlr4-4 cd ~/antlr/code/antlr4/runtime/CSharp/src/Tree/Xpath java -cp ":/Users/parrt/.m2/repository/org/antlr/antlr4/4.13-0-SNAPSHOT/antlr4-4.13-0-SNAPSHOT-complete.jar:$CLASSPATH" org.antlr.v4.Tool -Dlanguage=CSharp XPathLexer.g4 -cd ~/antlr/code/antlr4/runtime/Python2/src/antlr4/xpath -java -cp ":/Users/parrt/.m2/repository/org/antlr/antlr4/4.13-0-SNAPSHOT/antlr4-4.13-0-SNAPSHOT-complete.jar:$CLASSPATH" org.antlr.v4.Tool -Dlanguage=Python2 XPathLexer.g4 - cd ~/antlr/code/antlr4/runtime/Python3/tests/expr java -cp ":/Users/parrt/.m2/repository/org/antlr/antlr4/4.13-0-SNAPSHOT/antlr4-4.13-0-SNAPSHOT-complete.jar:$CLASSPATH" org.antlr.v4.Tool -Dlanguage=Python3 Expr.g4 cd ~/antlr/code/antlr4/runtime/Python3/src/antlr4/xpath @@ -317,7 +314,7 @@ nuget push Antlr4.Runtime.Standard..nupkg -Source https://ww ### Python -The Python targets get deployed with `twine` for Python 2 and 3. +The Python target gets deployed with `twine` for Python 3. First, set up `~/.pypirc` with tight privileges: @@ -343,16 +340,6 @@ password: xxx Then run the python build and upload: -```bash -cd ~/antlr/code/antlr4/runtime/Python2 -# assume you have ~/.pypirc set up -pip install build twine -python -m build -twine upload dist/antlr4-python2-runtime-4.13.0.tar.gz -``` - -For Python 3 target, do - ```bash cd ~/antlr/code/antlr4/runtime/Python3 python -m build diff --git a/doc/target-agnostic-grammars.md b/doc/target-agnostic-grammars.md new file mode 100644 index 0000000000..00e9eedc75 --- /dev/null +++ b/doc/target-agnostic-grammars.md @@ -0,0 +1,46 @@ +# Target agnostic grammars + +If your grammar is targeted to Python only, you may ignore the following. But if your goal is to get your Java parser to also run in Python, then you might find it useful. + +1. Do not embed production code inside your grammar. This is not portable and will not be. Move all your code to listeners or visitors. +1. The only production code absolutely required to sit with the grammar should be semantic predicates, like: +``` +ID {$text.equals("test")}? +``` + +Unfortunately, this is not portable, as Java and Python (and other target languages) have different syntaxes for all but the simplest language elements. But you can work around it. The trick involves: + +* deriving your parser from a parser you provide, such as BaseParser +* implementing utility methods, such as "isEqualText", in this BaseParser, in different files for each target language +* invoking your utility methods in the semantic predicate from the `$parser` object + +Thanks to the above, you should be able to rewrite the above semantic predicate as follows: + +File `MyGrammarParser.g4`: +``` +options { superClass = MyGrammarBaseParser; } +... +ID {$parser.isEqualText($text,"test")}? +``` + +File `MyGrammarBaseParser.py`: +```python +from antlr4 import * + +class MyGrammarBaseParser(Parser): + + def isEqualText(a, b): + return a is b +``` + +File `MyGrammarBaseParser.java`: +```java +import org.antlr.v4.runtime.*; + +public abstract class MyGrammarBaseParser extends Parser { + + public static boolean isEqualText(a, b) { + return a.equals(b); + } +} +``` diff --git a/doc/targets.md b/doc/targets.md index 74dae819c0..296caf6f74 100644 --- a/doc/targets.md +++ b/doc/targets.md @@ -4,7 +4,7 @@ This page lists the available and upcoming ANTLR runtimes. Please note that you * [Java](java-target.md). The [ANTLR v4 book](http://pragprog.com/book/tpantlr2/the-definitive-antlr-4-reference) has a decent summary of the runtime library. We have added a useful XPath feature since the book was printed that lets you select bits of parse trees. See [Runtime API](http://www.antlr.org/api/Java/index.html) and [Getting Started with ANTLR v4](getting-started.md) * [C#](csharp-target.md) -* [Python](python-target.md) (2 and 3) +* [Python](python-target.md) (3) * [JavaScript](javascript-target.md) * [TypeScript](typescript-target.md) * [Go](go-target.md) @@ -17,7 +17,7 @@ This page lists the available and upcoming ANTLR runtimes. Please note that you New features generally appear in the Java target and then migrate to the other targets, but these other targets don't always get updated in the same overall tool release. This section tries to identify features added to Java that have not been added to the other targets. -|Feature|Java|C♯|Python2|Python3|JavaScript|Go|C++|Swift|PHP|Dart -|---|---|---|---|---|---|---|---|---|---|---| -|Ambiguous tree construction|4.5.1|-|-|-|-|-|-|-|-|-| +|Feature|Java|C♯|Python3|JavaScript|Go|C++|Swift|PHP|Dart +|---|---|---|---|---|---|---|---|---|---| +|Ambiguous tree construction|4.5.1|-|-|-|-|-|-|-|-| diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/ParserErrors/ExtraneousInput.txt b/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/ParserErrors/ExtraneousInput.txt index 4b0f7232a9..3d76cf6d5e 100644 --- a/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/ParserErrors/ExtraneousInput.txt +++ b/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/ParserErrors/ExtraneousInput.txt @@ -28,5 +28,4 @@ Go JavaScript TypeScript PHP -Python2 Python3 diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/Performance/DropLoopEntryBranchInLRRule_4.txt b/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/Performance/DropLoopEntryBranchInLRRule_4.txt index 8bf8b21bc0..a6f4b3d966 100644 --- a/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/Performance/DropLoopEntryBranchInLRRule_4.txt +++ b/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/Performance/DropLoopEntryBranchInLRRule_4.txt @@ -51,7 +51,6 @@ between X1 and X2 or between X3 and X4 ; [skip] -Python2 Python3 JavaScript TypeScript diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/SemPredEvalParser/PredFromAltTestedInLoopBack_1.txt b/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/SemPredEvalParser/PredFromAltTestedInLoopBack_1.txt index 740ed05198..ca2fa6b9a1 100644 --- a/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/SemPredEvalParser/PredFromAltTestedInLoopBack_1.txt +++ b/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/SemPredEvalParser/PredFromAltTestedInLoopBack_1.txt @@ -41,5 +41,4 @@ Go JavaScript TypeScript PHP -Python2 Python3 diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/Python2.test.stg b/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/Python2.test.stg deleted file mode 100644 index d13e88507e..0000000000 --- a/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/Python2.test.stg +++ /dev/null @@ -1,286 +0,0 @@ -writeln(s) ::= <, file=self._output)>> -write(s) ::= <,end='', file=self._output)>> -writeList(s) ::= <)}; separator="+">, file=self._output)>> - -False() ::= "False" - -True() ::= "True" - -Not(v) ::= "not " - -Assert(s) ::= "" - -Cast(t,v) ::= "" - -Append(a,b) ::= " + str()" - -AppendStr(a,b) ::= " + " - -Concat(a,b) ::= "" - -AssertIsList(v) ::= "assert isinstance(, (list, tuple))" - -AssignLocal(s,v) ::= " = " - -InitIntMember(n,v) ::= <% = %> - -InitBooleanMember(n,v) ::= <% = %> - -InitIntVar(n,v) ::= <%%> - -IntArg(n) ::= "" - -VarRef(n) ::= "" - -GetMember(n) ::= <%self.%> - -SetMember(n,v) ::= <%self. = %> - -AddMember(n,v) ::= <%self. += %> - -MemberEquals(n,v) ::= <%self. == %> - -ModMemberEquals(n,m,v) ::= <%self. % == %> - -ModMemberNotEquals(n,m,v) ::= <%self. % != %> - -DumpDFA() ::= "self.dumpDFA()" - -Pass() ::= "pass" - -StringList() ::= "" - -BuildParseTrees() ::= "self._buildParseTrees = True" - -BailErrorStrategy() ::= <%self._errHandler = BailErrorStrategy()%> - -ToStringTree(s) ::= <%.toStringTree(recog=self)%> - -Column() ::= "self.column" - -Text() ::= "self.text" - -ValEquals(a,b) ::= <%==%> - -TextEquals(a) ::= <%self.text==""%> - -PlusText(a) ::= <%"" + self.text%> - -InputText() ::= "self._input.getText()" - -LTEquals(i, v) ::= <%self._input.LT().text==%> - -LANotEquals(i, v) ::= <%self._input.LA()!=%> - -TokenStartColumnEquals(i) ::= <%self._tokenStartColumn==%> - -ImportListener(X) ::= "" - -GetExpectedTokenNames() ::= "self.getExpectedTokens().toString(self.literalNames, self.symbolicNames)" - -ImportRuleInvocationStack() ::= "" - -RuleInvocationStack() ::= "str_list(self.getRuleInvocationStack())" - -LL_EXACT_AMBIG_DETECTION() ::= <> - -ParserToken(parser, token) ::= <%.%> - -Production(p) ::= <%

%> - -Result(r) ::= <%%> - -ParserPropertyMember() ::= << -@members { -def Property(self): - return True - -} ->> - -ParserPropertyCall(p, call) ::= "

." - -PositionAdjustingLexerDef() ::= "" - -PositionAdjustingLexer() ::= << - -def resetAcceptPosition(self, index, line, column): - self._input.seek(index) - self.line = line - self.column = column - self._interp.consume(self._input) - -def nextToken(self): - if self._interp.__dict__.get("resetAcceptPosition", None) is None: - self._interp.__dict__["resetAcceptPosition"] = self.resetAcceptPosition - return super(type(self),self).nextToken() - -def emit(self): - if self._type==PositionAdjustingLexer.TOKENS: - self.handleAcceptPositionForKeyword("tokens") - elif self._type==PositionAdjustingLexer.LABEL: - self.handleAcceptPositionForIdentifier() - return super(type(self),self).emit() - -def handleAcceptPositionForIdentifier(self): - tokenText = self.text - identifierLength = 0 - while identifierLength \< len(tokenText) and self.isIdentifierChar(tokenText[identifierLength]): - identifierLength += 1 - - if self._input.index > self._tokenStartCharIndex + identifierLength: - offset = identifierLength - 1 - self._interp.resetAcceptPosition(self._tokenStartCharIndex + offset, - self._tokenStartLine, self._tokenStartColumn + offset) - return True - else: - return False - - -def handleAcceptPositionForKeyword(self, keyword): - if self._input.index > self._tokenStartCharIndex + len(keyword): - offset = len(keyword) - 1 - self._interp.resetAcceptPosition(self._tokenStartCharIndex + offset, - self._tokenStartLine, self._tokenStartColumn + offset) - return True - else: - return False - -@staticmethod -def isIdentifierChar(c): - return c.isalnum() or c == '_' - ->> - -BasicListener(X) ::= << -@parser::members { -if __name__ is not None and "." in __name__: - from .Listener import Listener -else: - from Listener import Listener - -class LeafListener(TListener): - def __init__(self, output): - self._output = output - def visitTerminal(self, node): - print(node.symbol.text, file=self._output) -} ->> - -WalkListener(s) ::= << -walker = ParseTreeWalker() -walker.walk(TParser.LeafListener(self._output), ) ->> - -TreeNodeWithAltNumField(X) ::= << -@parser::members { -class MyRuleNode(ParserRuleContext): - def __init__(self, parent = None, invokingStateNumber = None ): - super(Parser.MyRuleNode, self).__init__(parent, invokingStateNumber) - self.altNum = 0; - def getAltNumber(self): - return self.altNum - def setAltNumber(self, altNum): - self.altNum = altNum -} ->> - -TokenGetterListener(X) ::= << -@parser::members { -if __name__ is not None and "." in __name__: - from .Listener import Listener -else: - from Listener import Listener - -class LeafListener(TListener): - def __init__(self, output): - self._output = output - def exitA(self, ctx): - if ctx.getChildCount()==2: - print(ctx.INT(0).symbol.text + ' ' + ctx.INT(1).symbol.text + ' ' + str_list(ctx.INT()), file=self._output) - else: - print(str(ctx.ID().symbol), file=self._output) -} ->> - -RuleGetterListener(X) ::= << -@parser::members { -if __name__ is not None and "." in __name__: - from .Listener import Listener -else: - from Listener import Listener - -class LeafListener(TListener): - def __init__(self, output): - self._output = output - def exitA(self, ctx): - if ctx.getChildCount()==2: - print(ctx.b(0).start.text + ' ' + ctx.b(1).start.text + ' ' + ctx.b()[0].start.text, file=self._output) - else: - print(ctx.b(0).start.text, file=self._output) -} ->> - - -LRListener(X) ::= << -@parser::members { -if __name__ is not None and "." in __name__: - from .Listener import Listener -else: - from Listener import Listener - -class LeafListener(TListener): - def __init__(self, output): - self._output = output - def exitE(self, ctx): - if ctx.getChildCount()==3: - print(ctx.e(0).start.text + ' ' + ctx.e(1).start.text + ' ' + ctx.e()[0].start.text, file=self._output) - else: - print(ctx.INT().symbol.text, file=self._output) -} ->> - -LRWithLabelsListener(X) ::= << -@parser::members { -if __name__ is not None and "." in __name__: - from .Listener import Listener -else: - from Listener import Listener - -class LeafListener(TListener): - def __init__(self, output): - self._output = output - def exitCall(self, ctx): - print(ctx.e().start.text + ' ' + str(ctx.eList()), file=self._output) - def exitInt(self, ctx): - print(ctx.INT().symbol.text, file=self._output) -} ->> - -DeclareContextListGettersFunction() ::= << -def foo(): - s = SContext() - a = s.a() - b = s.b() ->> - -Declare_foo() ::= <> - -Invoke_foo() ::= "self.foo()" - -Declare_pred() ::= < -# This token stream ignores the value of {@link Token#getChannel}. If your -# parser requires the token stream filter tokens to only those on a particular -# channel, such as {@link Token#DEFAULT_CHANNEL} or -# {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a -# {@link CommonTokenStream}.

-from io import StringIO - -from antlr4.Token import Token -from antlr4.error.Errors import IllegalStateException - -# this is just to keep meaningful parameter types to Parser -class TokenStream(object): - - pass - - -class BufferedTokenStream(TokenStream): - - def __init__(self, tokenSource): - # The {@link TokenSource} from which tokens for this stream are fetched. - self.tokenSource = tokenSource - - # A collection of all tokens fetched from the token source. The list is - # considered a complete view of the input once {@link #fetchedEOF} is set - # to {@code true}. - self.tokens = [] - - # The index into {@link #tokens} of the current token (next token to - # {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be - # {@link #LT LT(1)}. - # - #

This field is set to -1 when the stream is first constructed or when - # {@link #setTokenSource} is called, indicating that the first token has - # not yet been fetched from the token source. For additional information, - # see the documentation of {@link IntStream} for a description of - # Initializing Methods.

- self.index = -1 - - # Indicates whether the {@link Token#EOF} token has been fetched from - # {@link #tokenSource} and added to {@link #tokens}. This field improves - # performance for the following cases: - # - #
    - #
  • {@link #consume}: The lookahead check in {@link #consume} to prevent - # consuming the EOF symbol is optimized by checking the values of - # {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.
  • - #
  • {@link #fetch}: The check to prevent adding multiple EOF symbols into - # {@link #tokens} is trivial with this field.
  • - #
      - self.fetchedEOF = False - - def mark(self): - return 0 - - def release(self, marker): - # no resources to release - pass - - def reset(self): - self.seek(0) - - def seek(self, index): - self.lazyInit() - self.index = self.adjustSeekIndex(index) - - def get(self, index): - self.lazyInit() - return self.tokens[index] - - def consume(self): - skipEofCheck = False - if self.index >= 0: - if self.fetchedEOF: - # the last token in tokens is EOF. skip check if p indexes any - # fetched token except the last. - skipEofCheck = self.index < len(self.tokens) - 1 - else: - # no EOF token in tokens. skip check if p indexes a fetched token. - skipEofCheck = self.index < len(self.tokens) - else: - # not yet initialized - skipEofCheck = False - - if not skipEofCheck and self.LA(1) == Token.EOF: - raise IllegalStateException("cannot consume EOF") - - if self.sync(self.index + 1): - self.index = self.adjustSeekIndex(self.index + 1) - - # Make sure index {@code i} in tokens has a token. - # - # @return {@code true} if a token is located at index {@code i}, otherwise - # {@code false}. - # @see #get(int i) - #/ - def sync(self, i): - n = i - len(self.tokens) + 1 # how many more elements we need? - if n > 0 : - fetched = self.fetch(n) - return fetched >= n - return True - - # Add {@code n} elements to buffer. - # - # @return The actual number of elements added to the buffer. - #/ - def fetch(self, n): - if self.fetchedEOF: - return 0 - for i in range(0, n): - t = self.tokenSource.nextToken() - t.tokenIndex = len(self.tokens) - self.tokens.append(t) - if t.type==Token.EOF: - self.fetchedEOF = True - return i + 1 - return n - - - # Get all tokens from start..stop inclusively#/ - def getTokens(self, start, stop, types=None): - if start<0 or stop<0: - return None - self.lazyInit() - subset = [] - if stop >= len(self.tokens): - stop = len(self.tokens)-1 - for i in range(start, stop): - t = self.tokens[i] - if t.type==Token.EOF: - break - if types is None or t.type in types: - subset.append(t) - return subset - - def LA(self, i): - return self.LT(i).type - - def LB(self, k): - if (self.index-k) < 0: - return None - return self.tokens[self.index-k] - - def LT(self, k): - self.lazyInit() - if k==0: - return None - if k < 0: - return self.LB(-k) - i = self.index + k - 1 - self.sync(i) - if i >= len(self.tokens): # return EOF token - # EOF must be last token - return self.tokens[len(self.tokens)-1] - return self.tokens[i] - - # Allowed derived classes to modify the behavior of operations which change - # the current stream position by adjusting the target token index of a seek - # operation. The default implementation simply returns {@code i}. If an - # exception is thrown in this method, the current stream index should not be - # changed. - # - #

      For example, {@link CommonTokenStream} overrides this method to ensure that - # the seek target is always an on-channel token.

      - # - # @param i The target token index. - # @return The adjusted target token index. - - def adjustSeekIndex(self, i): - return i - - def lazyInit(self): - if self.index == -1: - self.setup() - - def setup(self): - self.sync(0) - self.index = self.adjustSeekIndex(0) - - # Reset this token stream by setting its token source.#/ - def setTokenSource(self, tokenSource): - self.tokenSource = tokenSource - self.tokens = [] - self.index = -1 - self.fetchedEOF = False - - - # Given a starting index, return the index of the next token on channel. - # Return i if tokens[i] is on channel. Return the index of the EOF toekn - # if there are no tokens on channel between i and EOF. - #/ - def nextTokenOnChannel(self, i, channel): - self.sync(i) - if i>=len(self.tokens): - return len(self.tokens) - 1 - token = self.tokens[i] - while token.channel!=channel: - if token.type==Token.EOF: - return i - i += 1 - self.sync(i) - token = self.tokens[i] - return i - - # Given a starting index, return the index of the previous token on channel. - # Return i if tokens[i] is on channel. Return -1 if there are no tokens - # on channel between i and 0. - def previousTokenOnChannel(self, i, channel): - while i>=0 and self.tokens[i].channel!=channel: - i -= 1 - return i - - # Collect all tokens on specified channel to the right of - # the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or - # EOF. If channel is -1, find any non default channel token. - def getHiddenTokensToRight(self, tokenIndex, channel=-1): - self.lazyInit() - if tokenIndex<0 or tokenIndex>=len(self.tokens): - raise Exception(str(tokenIndex) + " not in 0.." + str(len(self.tokens)-1)) - from antlr4.Lexer import Lexer - nextOnChannel = self.nextTokenOnChannel(tokenIndex + 1, Lexer.DEFAULT_TOKEN_CHANNEL) - from_ = tokenIndex+1 - # if none onchannel to right, nextOnChannel=-1 so set to = last token - to = (len(self.tokens)-1) if nextOnChannel==-1 else nextOnChannel - return self.filterForChannel(from_, to, channel) - - - # Collect all tokens on specified channel to the left of - # the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. - # If channel is -1, find any non default channel token. - def getHiddenTokensToLeft(self, tokenIndex, channel=-1): - self.lazyInit() - if tokenIndex<0 or tokenIndex>=len(self.tokens): - raise Exception(str(tokenIndex) + " not in 0.." + str(len(self.tokens)-1)) - from antlr4.Lexer import Lexer - prevOnChannel = self.previousTokenOnChannel(tokenIndex - 1, Lexer.DEFAULT_TOKEN_CHANNEL) - if prevOnChannel == tokenIndex - 1: - return None - # if none on channel to left, prevOnChannel=-1 then from=0 - from_ = prevOnChannel+1 - to = tokenIndex-1 - return self.filterForChannel(from_, to, channel) - - - def filterForChannel(self, left, right, channel): - hidden = [] - for i in range(left, right+1): - t = self.tokens[i] - if channel==-1: - from antlr4.Lexer import Lexer - if t.channel!= Lexer.DEFAULT_TOKEN_CHANNEL: - hidden.append(t) - elif t.channel==channel: - hidden.append(t) - if len(hidden)==0: - return None - return hidden - - def getSourceName(self): - return self.tokenSource.getSourceName() - - def getText(self, start=None, stop=None): - """ - Get the text of all tokens in this buffer. - :return: string - """ - self.lazyInit() - self.fill() - if isinstance(start, Token): - start = start.tokenIndex - elif start is None: - start = 0 - if isinstance(stop, Token): - stop = stop.tokenIndex - elif stop is None or stop >= len(self.tokens): - stop = len(self.tokens) - 1 - if start < 0 or stop < 0 or stop> 10) + 0xD800 - low_surrogate = (base & 0x3FF) + 0xDC00 - yield unichr(high_surrogate) - yield unichr(low_surrogate) - -def _to_chars(code_points): - return (unichr(cp) for cp in code_points) - -if sys.maxunicode == 0xFFFF: - from_unicode = _from_utf16 - to_chars = _to_utf16 -else: - assert sys.maxunicode == 0x10FFFF - from_unicode = _from_unicode - to_chars = _to_chars - -def to_unicode(code_points): - return u''.join(to_chars(code_points)) diff --git a/runtime/Python2/src/antlr4/CommonTokenFactory.py b/runtime/Python2/src/antlr4/CommonTokenFactory.py deleted file mode 100644 index 4cc64f19f7..0000000000 --- a/runtime/Python2/src/antlr4/CommonTokenFactory.py +++ /dev/null @@ -1,59 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - -# -# This default implementation of {@link TokenFactory} creates -# {@link CommonToken} objects. -# -from antlr4.Token import CommonToken - -class TokenFactory(object): - - pass - -class CommonTokenFactory(TokenFactory): - # - # The default {@link CommonTokenFactory} instance. - # - #

      - # This token factory does not explicitly copy token text when constructing - # tokens.

      - # - DEFAULT = None - - def __init__(self, copyText=False): - # Indicates whether {@link CommonToken#setText} should be called after - # constructing tokens to explicitly set the text. This is useful for cases - # where the input stream might not be able to provide arbitrary substrings - # of text from the input after the lexer creates a token (e.g. the - # implementation of {@link CharStream#getText} in - # {@link UnbufferedCharStream} throws an - # {@link UnsupportedOperationException}). Explicitly setting the token text - # allows {@link Token#getText} to be called at any time regardless of the - # input stream implementation. - # - #

      - # The default value is {@code false} to avoid the performance and memory - # overhead of copying text for every token unless explicitly requested.

      - # - self.copyText = copyText - - def create(self, source, type, text, channel, start, stop, line, column): - t = CommonToken(source, type, channel, start, stop) - t.line = line - t.column = column - if text is not None: - t.text = text - elif self.copyText and source[1] is not None: - t.text = source[1].getText(start,stop) - return t - - def createThin(self, type, text): - t = CommonToken(type=type) - t.text = text - return t - -CommonTokenFactory.DEFAULT = CommonTokenFactory() \ No newline at end of file diff --git a/runtime/Python2/src/antlr4/CommonTokenStream.py b/runtime/Python2/src/antlr4/CommonTokenStream.py deleted file mode 100644 index 1f3f15a643..0000000000 --- a/runtime/Python2/src/antlr4/CommonTokenStream.py +++ /dev/null @@ -1,85 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -#/ - -# -# This class extends {@link BufferedTokenStream} with functionality to filter -# token streams to tokens on a particular channel (tokens where -# {@link Token#getChannel} returns a particular value). -# -#

      -# This token stream provides access to all tokens by index or when calling -# methods like {@link #getText}. The channel filtering is only used for code -# accessing tokens via the lookahead methods {@link #LA}, {@link #LT}, and -# {@link #LB}.

      -# -#

      -# By default, tokens are placed on the default channel -# ({@link Token#DEFAULT_CHANNEL}), but may be reassigned by using the -# {@code ->channel(HIDDEN)} lexer command, or by using an embedded action to -# call {@link Lexer#setChannel}. -#

      -# -#

      -# Note: lexer rules which use the {@code ->skip} lexer command or call -# {@link Lexer#skip} do not produce tokens at all, so input text matched by -# such a rule will not be available as part of the token stream, regardless of -# channel.

      -#/ - -from antlr4.BufferedTokenStream import BufferedTokenStream -from antlr4.Token import Token - - -class CommonTokenStream(BufferedTokenStream): - - def __init__(self, lexer, channel=Token.DEFAULT_CHANNEL): - super(CommonTokenStream, self).__init__(lexer) - self.channel = channel - - def adjustSeekIndex(self, i): - return self.nextTokenOnChannel(i, self.channel) - - def LB(self, k): - if k==0 or (self.index-k)<0: - return None - i = self.index - n = 1 - # find k good tokens looking backwards - while n <= k: - # skip off-channel tokens - i = self.previousTokenOnChannel(i - 1, self.channel) - n += 1 - if i < 0: - return None - return self.tokens[i] - - def LT(self, k): - self.lazyInit() - if k == 0: - return None - if k < 0: - return self.LB(-k) - i = self.index - n = 1 # we know tokens[pos] is a good one - # find k good tokens - while n < k: - # skip off-channel tokens, but make sure to not look past EOF - if self.sync(i + 1): - i = self.nextTokenOnChannel(i + 1, self.channel) - n += 1 - return self.tokens[i] - - # Count EOF just once.#/ - def getNumberOfOnChannelTokens(self): - n = 0 - self.fill() - for i in range(0, len(self.tokens)): - t = self.tokens[i] - if t.channel==self.channel: - n += 1 - if t.type==Token.EOF: - break - return n diff --git a/runtime/Python2/src/antlr4/FileStream.py b/runtime/Python2/src/antlr4/FileStream.py deleted file mode 100644 index 99f616a79c..0000000000 --- a/runtime/Python2/src/antlr4/FileStream.py +++ /dev/null @@ -1,32 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - -# -# This is an InputStream that is loaded from a file all at once -# when you construct the object. -# - -import codecs -import unittest -from antlr4.InputStream import InputStream - - -class FileStream(InputStream): - - def __init__(self, fileName, encoding='ascii', errors='strict'): - self.fileName = fileName - # read binary to avoid line ending conversion - with open(fileName, 'rb') as file: - bytes = file.read() - data = codecs.decode(bytes, encoding, errors) - super(type(self), self).__init__(data) - - -class TestFileStream(unittest.TestCase): - - def testStream(self): - stream = FileStream("FileStream.py") - self.assertTrue(stream.size>0) diff --git a/runtime/Python2/src/antlr4/InputStream.py b/runtime/Python2/src/antlr4/InputStream.py deleted file mode 100644 index ab12a35da7..0000000000 --- a/runtime/Python2/src/antlr4/InputStream.py +++ /dev/null @@ -1,108 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# -import unittest - - -# -# Vacuum all input from a string and then treat it like a buffer. -# -from antlr4.Token import Token -from antlr4.CodePoints import from_unicode, to_unicode -import sys - -class InputStream (object): - - def __init__(self, data, encoding = "ascii"): - self.name = "" - self.strdata = data if isinstance(data, unicode) else unicode(data, encoding) - self._loadString() - - def _loadString(self): - self._index = 0 - self.data = list(from_unicode(self.strdata)) - self._size = len(self.data) - - @property - def index(self): - return self._index - - @property - def size(self): - return self._size - - # Reset the stream so that it's in the same state it was - # when the object was created *except* the data array is not - # touched. - # - def reset(self): - self._index = 0 - - def consume(self): - if self._index >= self._size: - assert self.LA(1) == Token.EOF - raise Exception("cannot consume EOF") - self._index += 1 - - def LA(self, offset): - if offset==0: - return 0 # undefined - if offset<0: - offset += 1 # e.g., translate LA(-1) to use offset=0 - pos = self._index + offset - 1 - if pos < 0 or pos >= self._size: # invalid - return Token.EOF - return self.data[pos] - - def LT(self, offset): - return self.LA(offset) - - # mark/release do nothing; we have entire buffer - def mark(self): - return -1 - - def release(self, marker): - pass - - # consume() ahead until p==_index; can't just set p=_index as we must - # update line and column. If we seek backwards, just set p - # - def seek(self, _index): - if _index<=self._index: - self._index = _index # just jump; don't update stream state (line, ...) - return - # seek forward - self._index = min(_index, self._size) - - def getText(self, start, stop): - if stop >= self._size: - stop = self._size-1 - if start >= self._size: - return u"" - else: - return to_unicode(self.data[start:stop+1]) - - def __str__(self): - return unicode(self) - - def __unicode__(self): - return self.strdata - - -class TestInputStream(unittest.TestCase): - - def testStream(self): - stream = InputStream("abcde") - self.assertEqual(0, stream.index) - self.assertEqual(5, stream.size) - self.assertEqual(ord("a"), stream.LA(1)) - stream.consume() - self.assertEqual(1, stream.index) - stream.seek(5) - self.assertEqual(Token.EOF, stream.LA(1)) - self.assertEqual("bcd", stream.getText(1, 3)) - stream.reset() - self.assertEqual(0, stream.index) - diff --git a/runtime/Python2/src/antlr4/IntervalSet.py b/runtime/Python2/src/antlr4/IntervalSet.py deleted file mode 100644 index 4502957130..0000000000 --- a/runtime/Python2/src/antlr4/IntervalSet.py +++ /dev/null @@ -1,296 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - -from io import StringIO -import unittest -from antlr4.Token import Token - -class Interval(object): - - def __init__(self, start, stop): - self.start = start - self.stop = stop - self.range = xrange(start, stop) - - def __contains__(self, item): - return item in self.range - - def __len__(self): - return self.stop - self.start - - def __iter__(self): - return iter(self.range) - - -class IntervalSet(object): - - def __init__(self): - self.intervals = None # type: list | None - self.readonly = False - - def __iter__(self): - if self.intervals is not None: - for i in self.intervals: - for c in i: - yield c - - def __getitem__(self, item): - i = 0 - for k in self: - if i==item: - return k - else: - i += 1 - return Token.INVALID_TYPE - - def addOne(self, v): - self.addRange(Interval(v, v+1)) - - def addRange(self, v): - if self.intervals is None: - self.intervals = list() - self.intervals.append(v) - else: - # find insert pos - k = 0 - for i in self.intervals: - # distinct range -> insert - if v.stop adjust - elif v.stop==i.start: - self.intervals[k] = Interval(v.start, i.stop) - return - # overlapping range -> adjust and reduce - elif v.start<=i.stop: - self.intervals[k] = Interval(min(i.start,v.start), max(i.stop,v.stop)) - self.reduce(k) - return - k += 1 - # greater than any existing - self.intervals.append(v) - - def addSet(self, other): - if other.intervals is not None: - for i in other.intervals: - self.addRange(i) - return self - - def reduce(self, k): - # only need to reduce if k is not the last - if k= r.stop: - self.intervals.pop(k+1) - self.reduce(k) - elif l.stop >= r.start: - self.intervals[k] = Interval(l.start, r.stop) - self.intervals.pop(k+1) - - def complement(self, start, stop): - result = IntervalSet() - result.addRange(Interval(start,stop+1)) - for i in self.intervals: - result.removeRange(i) - return result - - def __contains__(self, item): - if self.intervals is None: - return False - else: - return any(item in i for i in self.intervals) - - def __len__(self): - return sum(len(i) for i in self.intervals) - - def removeRange(self, v): - if v.start==v.stop-1: - self.removeOne(v.start) - elif self.intervals is not None: - k = 0 - for i in self.intervals: - # intervals are ordered - if v.stop<=i.start: - return - # check for including range, split it - elif v.start>i.start and v.stop=i.stop: - self.intervals.pop(k) - k -= 1 # need another pass - # check for lower boundary - elif v.start1: - buf.write(u"{") - first = True - for i in self.intervals: - for j in i: - if not first: - buf.write(u", ") - buf.write(self.elementName(literalNames, symbolicNames, j)) - first = False - if len(self)>1: - buf.write(u"}") - return buf.getvalue() - - def elementName(self, literalNames, symbolicNames, a): - if a==Token.EOF: - return u"" - elif a==Token.EPSILON: - return u"" - else: - if a": - return literalNames[a] - if a" - - -class TestIntervalSet(unittest.TestCase): - - def testEmpty(self): - s = IntervalSet() - self.assertIsNone(s.intervals) - self.assertFalse(30 in s) - - def testOne(self): - s = IntervalSet() - s.addOne(30) - self.assertTrue(30 in s) - self.assertFalse(29 in s) - self.assertFalse(31 in s) - - def testTwo(self): - s = IntervalSet() - s.addOne(30) - s.addOne(40) - self.assertTrue(30 in s) - self.assertTrue(40 in s) - self.assertFalse(35 in s) - - def testRange(self): - s = IntervalSet() - s.addRange(Interval(30,41)) - self.assertTrue(30 in s) - self.assertTrue(40 in s) - self.assertTrue(35 in s) - - def testDistinct1(self): - s = IntervalSet() - s.addRange(Interval(30,32)) - s.addRange(Interval(40,42)) - self.assertEquals(2,len(s.intervals)) - self.assertTrue(30 in s) - self.assertTrue(40 in s) - self.assertFalse(35 in s) - - def testDistinct2(self): - s = IntervalSet() - s.addRange(Interval(40,42)) - s.addRange(Interval(30,32)) - self.assertEquals(2,len(s.intervals)) - self.assertTrue(30 in s) - self.assertTrue(40 in s) - self.assertFalse(35 in s) - - def testContiguous1(self): - s = IntervalSet() - s.addRange(Interval(30,36)) - s.addRange(Interval(36,41)) - self.assertEquals(1,len(s.intervals)) - self.assertTrue(30 in s) - self.assertTrue(40 in s) - self.assertTrue(35 in s) - - def testContiguous2(self): - s = IntervalSet() - s.addRange(Interval(36,41)) - s.addRange(Interval(30,36)) - self.assertEquals(1,len(s.intervals)) - self.assertTrue(30 in s) - self.assertTrue(40 in s) - - def testOverlapping1(self): - s = IntervalSet() - s.addRange(Interval(30,40)) - s.addRange(Interval(35,45)) - self.assertEquals(1,len(s.intervals)) - self.assertTrue(30 in s) - self.assertTrue(44 in s) - - def testOverlapping2(self): - s = IntervalSet() - s.addRange(Interval(35,45)) - s.addRange(Interval(30,40)) - self.assertEquals(1,len(s.intervals)) - self.assertTrue(30 in s) - self.assertTrue(44 in s) - - def testOverlapping3(self): - s = IntervalSet() - s.addRange(Interval(30,32)) - s.addRange(Interval(40,42)) - s.addRange(Interval(50,52)) - s.addRange(Interval(20,61)) - self.assertEquals(1,len(s.intervals)) - self.assertTrue(20 in s) - self.assertTrue(60 in s) - - def testComplement(self): - s = IntervalSet() - s.addRange(Interval(10,21)) - c = s.complement(1,100) - self.assertTrue(1 in c) - self.assertTrue(100 in c) - self.assertTrue(10 not in c) - self.assertTrue(20 not in c) - - diff --git a/runtime/Python2/src/antlr4/LL1Analyzer.py b/runtime/Python2/src/antlr4/LL1Analyzer.py deleted file mode 100644 index 1c89c492e0..0000000000 --- a/runtime/Python2/src/antlr4/LL1Analyzer.py +++ /dev/null @@ -1,170 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -#/ -from antlr4.IntervalSet import IntervalSet, Interval -from antlr4.Token import Token -from antlr4.PredictionContext import PredictionContext, SingletonPredictionContext, PredictionContextFromRuleContext -from antlr4.atn.ATNConfig import ATNConfig -from antlr4.atn.ATNState import ATNState, RuleStopState -from antlr4.atn.Transition import WildcardTransition, NotSetTransition, AbstractPredicateTransition, RuleTransition - - -class LL1Analyzer (object): - - #* Special value added to the lookahead sets to indicate that we hit - # a predicate during analysis if {@code seeThruPreds==false}. - #/ - HIT_PRED = Token.INVALID_TYPE - - def __init__(self, atn): - self.atn = atn - - #* - # Calculates the SLL(1) expected lookahead set for each outgoing transition - # of an {@link ATNState}. The returned array has one element for each - # outgoing transition in {@code s}. If the closure from transition - # i leads to a semantic predicate before matching a symbol, the - # element at index i of the result will be {@code null}. - # - # @param s the ATN state - # @return the expected symbols for each outgoing transition of {@code s}. - #/ - def getDecisionLookahead(self, s): - if s is None: - return None - - count = len(s.transitions) - look = [] * count - for alt in range(0, count): - look[alt] = set() - lookBusy = set() - seeThruPreds = False # fail to get lookahead upon pred - self._LOOK(s.transition(alt).target, None, PredictionContext.EMPTY, \ - look[alt], lookBusy, set(), seeThruPreds, False) - # Wipe out lookahead for this alternative if we found nothing - # or we had a predicate when we !seeThruPreds - if len(look[alt])==0 or self.HIT_PRED in look[alt]: - look[alt] = None - return look - - #* - # Compute set of tokens that can follow {@code s} in the ATN in the - # specified {@code ctx}. - # - #

      If {@code ctx} is {@code null} and the end of the rule containing - # {@code s} is reached, {@link Token#EPSILON} is added to the result set. - # If {@code ctx} is not {@code null} and the end of the outermost rule is - # reached, {@link Token#EOF} is added to the result set.

      - # - # @param s the ATN state - # @param stopState the ATN state to stop at. This can be a - # {@link BlockEndState} to detect epsilon paths through a closure. - # @param ctx the complete parser context, or {@code null} if the context - # should be ignored - # - # @return The set of tokens that can follow {@code s} in the ATN in the - # specified {@code ctx}. - #/ - def LOOK(self, s, stopState=None, ctx=None): - r = IntervalSet() - seeThruPreds = True # ignore preds; get all lookahead - lookContext = PredictionContextFromRuleContext(s.atn, ctx) if ctx is not None else None - self._LOOK(s, stopState, lookContext, r, set(), set(), seeThruPreds, True) - return r - - #* - # Compute set of tokens that can follow {@code s} in the ATN in the - # specified {@code ctx}. - # - #

      If {@code ctx} is {@code null} and {@code stopState} or the end of the - # rule containing {@code s} is reached, {@link Token#EPSILON} is added to - # the result set. If {@code ctx} is not {@code null} and {@code addEOF} is - # {@code true} and {@code stopState} or the end of the outermost rule is - # reached, {@link Token#EOF} is added to the result set.

      - # - # @param s the ATN state. - # @param stopState the ATN state to stop at. This can be a - # {@link BlockEndState} to detect epsilon paths through a closure. - # @param ctx The outer context, or {@code null} if the outer context should - # not be used. - # @param look The result lookahead set. - # @param lookBusy A set used for preventing epsilon closures in the ATN - # from causing a stack overflow. Outside code should pass - # {@code new HashSet} for this argument. - # @param calledRuleStack A set used for preventing left recursion in the - # ATN from causing a stack overflow. Outside code should pass - # {@code new BitSet()} for this argument. - # @param seeThruPreds {@code true} to true semantic predicates as - # implicitly {@code true} and "see through them", otherwise {@code false} - # to treat semantic predicates as opaque and add {@link #HIT_PRED} to the - # result if one is encountered. - # @param addEOF Add {@link Token#EOF} to the result if the end of the - # outermost context is reached. This parameter has no effect if {@code ctx} - # is {@code null}. - #/ - def _LOOK(self, s, stopState , ctx, look, lookBusy, \ - calledRuleStack, seeThruPreds, addEOF): - c = ATNConfig(s, 0, ctx) - - if c in lookBusy: - return - lookBusy.add(c) - - if s == stopState: - if ctx is None: - look.addOne(Token.EPSILON) - return - elif ctx.isEmpty() and addEOF: - look.addOne(Token.EOF) - return - - if isinstance(s, RuleStopState ): - if ctx is None: - look.addOne(Token.EPSILON) - return - elif ctx.isEmpty() and addEOF: - look.addOne(Token.EOF) - return - - if ctx != PredictionContext.EMPTY: - removed = s.ruleIndex in calledRuleStack - try: - calledRuleStack.discard(s.ruleIndex) - # run thru all possible stack tops in ctx - for i in range(0, len(ctx)): - returnState = self.atn.states[ctx.getReturnState(i)] - self._LOOK(returnState, stopState, ctx.getParent(i), look, lookBusy, calledRuleStack, seeThruPreds, addEOF) - finally: - if removed: - calledRuleStack.add(s.ruleIndex) - return - - for t in s.transitions: - if type(t) == RuleTransition: - if t.target.ruleIndex in calledRuleStack: - continue - - newContext = SingletonPredictionContext.create(ctx, t.followState.stateNumber) - - try: - calledRuleStack.add(t.target.ruleIndex) - self._LOOK(t.target, stopState, newContext, look, lookBusy, calledRuleStack, seeThruPreds, addEOF) - finally: - calledRuleStack.remove(t.target.ruleIndex) - elif isinstance(t, AbstractPredicateTransition ): - if seeThruPreds: - self._LOOK(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF) - else: - look.addOne(self.HIT_PRED) - elif t.isEpsilon: - self._LOOK(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF) - elif type(t) == WildcardTransition: - look.addRange( Interval(Token.MIN_USER_TOKEN_TYPE, self.atn.maxTokenType + 1) ) - else: - set_ = t.label - if set_ is not None: - if isinstance(t, NotSetTransition): - set_ = set_.complement(Token.MIN_USER_TOKEN_TYPE, self.atn.maxTokenType) - look.addSet(set_) diff --git a/runtime/Python2/src/antlr4/Lexer.py b/runtime/Python2/src/antlr4/Lexer.py deleted file mode 100644 index b1484999d8..0000000000 --- a/runtime/Python2/src/antlr4/Lexer.py +++ /dev/null @@ -1,319 +0,0 @@ -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -#/ - -# A lexer is recognizer that draws input symbols from a character stream. -# lexer grammars result in a subclass of self object. A Lexer object -# uses simplified match() and error recovery mechanisms in the interest -# of speed. -#/ -from __future__ import print_function -from io import StringIO -import sys -from antlr4.CommonTokenFactory import CommonTokenFactory -from antlr4.Recognizer import Recognizer -from antlr4.Token import Token -from antlr4.error.Errors import IllegalStateException, LexerNoViableAltException - -class TokenSource(object): - - pass - - -class Lexer(Recognizer, TokenSource): - - DEFAULT_MODE = 0 - MORE = -2 - SKIP = -3 - - DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL - HIDDEN = Token.HIDDEN_CHANNEL - MIN_CHAR_VALUE = 0x0000 - MAX_CHAR_VALUE = 0x10FFFF - - def __init__(self, input, output=sys.stdout): - super(Lexer, self).__init__() - self._input = input - self._output = output - self._factory = CommonTokenFactory.DEFAULT - self._tokenFactorySourcePair = (self, input) - - self._interp = None # child classes must populate this - - # The goal of all lexer rules/methods is to create a token object. - # self is an instance variable as multiple rules may collaborate to - # create a single token. nextToken will return self object after - # matching lexer rule(s). If you subclass to allow multiple token - # emissions, then set self to the last token to be matched or - # something nonnull so that the auto token emit mechanism will not - # emit another token. - self._token = None - - # What character index in the stream did the current token start at? - # Needed, for example, to get the text for current token. Set at - # the start of nextToken. - self._tokenStartCharIndex = -1 - - # The line on which the first character of the token resides#/ - self._tokenStartLine = -1 - - # The character position of first character within the line#/ - self._tokenStartColumn = -1 - - # Once we see EOF on char stream, next token will be EOF. - # If you have DONE : EOF ; then you see DONE EOF. - self._hitEOF = False - - # The channel number for the current token#/ - self._channel = Token.DEFAULT_CHANNEL - - # The token type for the current token#/ - self._type = Token.INVALID_TYPE - - self._modeStack = [] - self._mode = self.DEFAULT_MODE - - # You can set the text for the current token to override what is in - # the input char buffer. Use setText() or can set self instance var. - #/ - self._text = None - - - def reset(self): - # wack Lexer state variables - if self._input is not None: - self._input.seek(0) # rewind the input - self._token = None - self._type = Token.INVALID_TYPE - self._channel = Token.DEFAULT_CHANNEL - self._tokenStartCharIndex = -1 - self._tokenStartColumn = -1 - self._tokenStartLine = -1 - self._text = None - - self._hitEOF = False - self._mode = Lexer.DEFAULT_MODE - self._modeStack = [] - - self._interp.reset() - - # Return a token from self source; i.e., match a token on the char - # stream. - def nextToken(self): - if self._input is None: - raise IllegalStateException("nextToken requires a non-null input stream.") - - # Mark start location in char stream so unbuffered streams are - # guaranteed at least have text of current token - tokenStartMarker = self._input.mark() - try: - while True: - if self._hitEOF: - self.emitEOF() - return self._token - self._token = None - self._channel = Token.DEFAULT_CHANNEL - self._tokenStartCharIndex = self._input.index - self._tokenStartColumn = self._interp.column - self._tokenStartLine = self._interp.line - self._text = None - continueOuter = False - while True: - self._type = Token.INVALID_TYPE - ttype = self.SKIP - try: - ttype = self._interp.match(self._input, self._mode) - except LexerNoViableAltException as e: - self.notifyListeners(e) # report error - self.recover(e) - if self._input.LA(1)==Token.EOF: - self._hitEOF = True - if self._type == Token.INVALID_TYPE: - self._type = ttype - if self._type == self.SKIP: - continueOuter = True - break - if self._type!=self.MORE: - break - if continueOuter: - continue - if self._token is None: - self.emit() - return self._token - finally: - # make sure we release marker after match or - # unbuffered char stream will keep buffering - self._input.release(tokenStartMarker) - - # Instruct the lexer to skip creating a token for current lexer rule - # and look for another token. nextToken() knows to keep looking when - # a lexer rule finishes with token set to SKIP_TOKEN. Recall that - # if token==null at end of any token rule, it creates one for you - # and emits it. - #/ - def skip(self): - self._type = self.SKIP - - def more(self): - self._type = self.MORE - - def mode(self, m): - self._mode = m - - def pushMode(self, m): - if self._interp.debug: - print("pushMode " + str(m), file=self._output) - self._modeStack.append(self._mode) - self.mode(m) - - def popMode(self): - if len(self._modeStack)==0: - raise Exception("Empty Stack") - if self._interp.debug: - print("popMode back to "+ self._modeStack[:-1], file=self._output) - self.mode( self._modeStack.pop() ) - return self._mode - - # Set the char stream and reset the lexer#/ - @property - def inputStream(self): - return self._input - - @inputStream.setter - def inputStream(self, input): - self._input = None - self._tokenFactorySourcePair = (self, self._input) - self.reset() - self._input = input - self._tokenFactorySourcePair = (self, self._input) - - @property - def sourceName(self): - return self._input.sourceName - - # By default does not support multiple emits per nextToken invocation - # for efficiency reasons. Subclass and override self method, nextToken, - # and getToken (to push tokens into a list and pull from that list - # rather than a single variable as self implementation does). - #/ - def emitToken(self, token): - self._token = token - - # The standard method called to automatically emit a token at the - # outermost lexical rule. The token object should point into the - # char buffer start..stop. If there is a text override in 'text', - # use that to set the token's text. Override self method to emit - # custom Token objects or provide a new factory. - #/ - def emit(self): - t = self._factory.create(self._tokenFactorySourcePair, self._type, self._text, self._channel, self._tokenStartCharIndex, - self.getCharIndex()-1, self._tokenStartLine, self._tokenStartColumn) - self.emitToken(t) - return t - - def emitEOF(self): - cpos = self.column - lpos = self.line - eof = self._factory.create(self._tokenFactorySourcePair, Token.EOF, None, Token.DEFAULT_CHANNEL, self._input.index, - self._input.index-1, lpos, cpos) - self.emitToken(eof) - return eof - - @property - def type(self): - return self._type - - @type.setter - def type(self, type): - self._type = type - - @property - def line(self): - return self._interp.line - - @line.setter - def line(self, line): - self._interp.line = line - - @property - def column(self): - return self._interp.column - - @column.setter - def column(self, column): - self._interp.column = column - - # What is the index of the current character of lookahead?#/ - def getCharIndex(self): - return self._input.index - - # Return the text matched so far for the current token or any - # text override. - @property - def text(self): - if self._text is not None: - return self._text - else: - return self._interp.getText(self._input) - - # Set the complete text of self token; it wipes any previous - # changes to the text. - @text.setter - def text(self, txt): - self._text = txt - - # Return a list of all Token objects in input char stream. - # Forces load of all tokens. Does not include EOF token. - #/ - def getAllTokens(self): - tokens = [] - t = self.nextToken() - while t.type!=Token.EOF: - tokens.append(t) - t = self.nextToken() - return tokens - - def notifyListeners(self, e): - start = self._tokenStartCharIndex - stop = self._input.index - text = self._input.getText(start, stop) - msg = u"token recognition error at: '" + self.getErrorDisplay(text) + u"'" - listener = self.getErrorListenerDispatch() - listener.syntaxError(self, None, self._tokenStartLine, self._tokenStartColumn, msg, e) - - def getErrorDisplay(self, s): - with StringIO() as buf: - for c in s: - buf.write(unicode(self.getErrorDisplayForChar(c))) - return buf.getvalue() - - def getErrorDisplayForChar(self, c): - if ord(c[0])==Token.EOF: - return "" - elif c==u'\n': - return u"\\n" - elif c==u'\t': - return u"\\t" - elif c==u'\r': - return u"\\r" - else: - return c - - def getCharErrorDisplay(self, c): - return u"'" + self.getErrorDisplayForChar(c) + u"'" - - # Lexers can normally match any char in it's vocabulary after matching - # a token, so do the easy thing and just kill a character and hope - # it all works out. You can instead use the rule invocation stack - # to do sophisticated error recovery if you are in a fragment rule. - #/ - def recover(self, re): - if self._input.LA(1) != Token.EOF: - if isinstance(re, LexerNoViableAltException): - # skip a char and try again - self._interp.consume(self._input) - else: - # TODO: Do we lose character or line position information? - self._input.consume() - diff --git a/runtime/Python2/src/antlr4/ListTokenSource.py b/runtime/Python2/src/antlr4/ListTokenSource.py deleted file mode 100644 index 41c62793ee..0000000000 --- a/runtime/Python2/src/antlr4/ListTokenSource.py +++ /dev/null @@ -1,143 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - -# -# Provides an implementation of {@link TokenSource} as a wrapper around a list -# of {@link Token} objects. -# -#

      If the final token in the list is an {@link Token#EOF} token, it will be used -# as the EOF token for every call to {@link #nextToken} after the end of the -# list is reached. Otherwise, an EOF token will be created.

      -# -from antlr4.CommonTokenFactory import CommonTokenFactory -from antlr4.Lexer import TokenSource -from antlr4.Token import Token - - -class ListTokenSource(TokenSource): - - # Constructs a new {@link ListTokenSource} instance from the specified - # collection of {@link Token} objects and source name. - # - # @param tokens The collection of {@link Token} objects to provide as a - # {@link TokenSource}. - # @param sourceName The name of the {@link TokenSource}. If this value is - # {@code null}, {@link #getSourceName} will attempt to infer the name from - # the next {@link Token} (or the previous token if the end of the input has - # been reached). - # - # @exception NullPointerException if {@code tokens} is {@code null} - # - def __init__(self, tokens, sourceName=None): - if tokens is None: - raise ReferenceError("tokens cannot be null") - self.tokens = tokens - self.sourceName = sourceName - # The index into {@link #tokens} of token to return by the next call to - # {@link #nextToken}. The end of the input is indicated by this value - # being greater than or equal to the number of items in {@link #tokens}. - self.pos = 0 - # This field caches the EOF token for the token source. - self.eofToken = None - # This is the backing field for {@link #getTokenFactory} and - self._factory = CommonTokenFactory.DEFAULT - - - # - # {@inheritDoc} - # - @property - def column(self): - if self.pos < len(self.tokens): - return self.tokens[self.pos].column - elif self.eofToken is not None: - return self.eofToken.column - elif len(self.tokens) > 0: - # have to calculate the result from the line/column of the previous - # token, along with the text of the token. - lastToken = self.tokens[len(self.tokens) - 1] - tokenText = lastToken.text - if tokenText is not None: - lastNewLine = tokenText.rfind('\n') - if lastNewLine >= 0: - return len(tokenText) - lastNewLine - 1 - return lastToken.column + lastToken.stop - lastToken.start + 1 - - # only reach this if tokens is empty, meaning EOF occurs at the first - # position in the input - return 0 - - # - # {@inheritDoc} - # - def nextToken(self): - if self.pos >= len(self.tokens): - if self.eofToken is None: - start = -1 - if len(self.tokens) > 0: - previousStop = self.tokens[len(self.tokens) - 1].stop - if previousStop != -1: - start = previousStop + 1 - stop = max(-1, start - 1) - self.eofToken = self._factory.create((self, self.getInputStream()), - Token.EOF, "EOF", Token.DEFAULT_CHANNEL, start, stop, self.line, self.column) - return self.eofToken - t = self.tokens[self.pos] - if self.pos == len(self.tokens) - 1 and t.type == Token.EOF: - self.eofToken = t - self.pos += 1 - return t - - # - # {@inheritDoc} - # - @property - def line(self): - if self.pos < len(self.tokens): - return self.tokens[self.pos].line - elif self.eofToken is not None: - return self.eofToken.line - elif len(self.tokens) > 0: - # have to calculate the result from the line/column of the previous - # token, along with the text of the token. - lastToken = self.tokens[len(self.tokens) - 1] - line = lastToken.line - tokenText = lastToken.text - if tokenText is not None: - line += tokenText.count('\n') - - # if no text is available, assume the token did not contain any newline characters. - return line - - # only reach this if tokens is empty, meaning EOF occurs at the first - # position in the input - return 1 - - # - # {@inheritDoc} - # - def getInputStream(self): - if self.pos < len(self.tokens): - return self.tokens[self.pos].getInputStream() - elif self.eofToken is not None: - return self.eofToken.getInputStream() - elif len(self.tokens) > 0: - return self.tokens[len(self.tokens) - 1].getInputStream() - else: - # no input stream information is available - return None - - # - # {@inheritDoc} - # - def getSourceName(self): - if self.sourceName is not None: - return self.sourceName - inputStream = self.getInputStream() - if inputStream is not None: - return inputStream.getSourceName() - else: - return "List" diff --git a/runtime/Python2/src/antlr4/Parser.py b/runtime/Python2/src/antlr4/Parser.py deleted file mode 100644 index 69abe739b6..0000000000 --- a/runtime/Python2/src/antlr4/Parser.py +++ /dev/null @@ -1,563 +0,0 @@ -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -from __future__ import print_function -from antlr4.error.ErrorStrategy import DefaultErrorStrategy -from antlr4.Recognizer import Recognizer -from antlr4.Token import Token -from antlr4.Lexer import Lexer -from antlr4.atn.ATNDeserializer import ATNDeserializer -from antlr4.atn.ATNDeserializationOptions import ATNDeserializationOptions -from antlr4.error.Errors import UnsupportedOperationException -from antlr4.tree.ParseTreePatternMatcher import ParseTreePatternMatcher -from antlr4.tree.Tree import ParseTreeListener, ErrorNode, TerminalNode -import sys - -class TraceListener(ParseTreeListener): - - def __init__(self, parser): - self._parser = parser - - def enterEveryRule(self, ctx): - print("enter " + self._parser.ruleNames[ctx.getRuleIndex()] + ", LT(1)=" + self._parser._input.LT(1).text, file=self._parser._output) - - def visitTerminal(self, node): - print("consume " + str(node.symbol) + " rule " + self._parser.ruleNames[self._parser._ctx.getRuleIndex()], file=self._parser._output) - - def visitErrorNode(self, node): - pass - - def exitEveryRule(self, ctx): - print("exit " + self._parser.ruleNames[ctx.getRuleIndex()] + ", LT(1)=" + self._parser._input.LT(1).text, file=self._parser._output) - - -# self is all the parsing support code essentially; most of it is error recovery stuff.# -class Parser (Recognizer): - - # self field maps from the serialized ATN string to the deserialized {@link ATN} with - # bypass alternatives. - # - # @see ATNDeserializationOptions#isGenerateRuleBypassTransitions() - # - bypassAltsAtnCache = dict() - - def __init__(self, input, output=sys.stdout): - super(Parser, self).__init__() - # The input stream. - self._input = None - self._output = output - # The error handling strategy for the parser. The default value is a new - # instance of {@link DefaultErrorStrategy}. - self._errHandler = DefaultErrorStrategy() - self._precedenceStack = list() - self._precedenceStack.append(0) - # The {@link ParserRuleContext} object for the currently executing rule. - # self is always non-null during the parsing process. - self._ctx = None - # Specifies whether or not the parser should construct a parse tree during - # the parsing process. The default value is {@code true}. - self.buildParseTrees = True - # When {@link #setTrace}{@code (true)} is called, a reference to the - # {@link TraceListener} is stored here so it can be easily removed in a - # later call to {@link #setTrace}{@code (false)}. The listener itself is - # implemented as a parser listener so self field is not directly used by - # other parser methods. - self._tracer = None - # The list of {@link ParseTreeListener} listeners registered to receive - # events during the parse. - self._parseListeners = None - # The number of syntax errors reported during parsing. self value is - # incremented each time {@link #notifyErrorListeners} is called. - self._syntaxErrors = 0 - self.setInputStream(input) - - # reset the parser's state# - def reset(self): - if self._input is not None: - self._input.seek(0) - self._errHandler.reset(self) - self._ctx = None - self._syntaxErrors = 0 - self.setTrace(False) - self._precedenceStack = list() - self._precedenceStack.append(0) - if self._interp is not None: - self._interp.reset() - - # Match current input symbol against {@code ttype}. If the symbol type - # matches, {@link ANTLRErrorStrategy#reportMatch} and {@link #consume} are - # called to complete the match process. - # - #

      If the symbol type does not match, - # {@link ANTLRErrorStrategy#recoverInline} is called on the current error - # strategy to attempt recovery. If {@link #getBuildParseTree} is - # {@code true} and the token index of the symbol returned by - # {@link ANTLRErrorStrategy#recoverInline} is -1, the symbol is added to - # the parse tree by calling {@link ParserRuleContext#addErrorNode}.

      - # - # @param ttype the token type to match - # @return the matched symbol - # @throws RecognitionException if the current input symbol did not match - # {@code ttype} and the error strategy could not recover from the - # mismatched symbol - - def match(self, ttype): - t = self.getCurrentToken() - if t.type==ttype: - self._errHandler.reportMatch(self) - self.consume() - else: - t = self._errHandler.recoverInline(self) - if self.buildParseTrees and t.tokenIndex==-1: - # we must have conjured up a new token during single token insertion - # if it's not the current symbol - self._ctx.addErrorNode(t) - return t - - # Match current input symbol as a wildcard. If the symbol type matches - # (i.e. has a value greater than 0), {@link ANTLRErrorStrategy#reportMatch} - # and {@link #consume} are called to complete the match process. - # - #

      If the symbol type does not match, - # {@link ANTLRErrorStrategy#recoverInline} is called on the current error - # strategy to attempt recovery. If {@link #getBuildParseTree} is - # {@code true} and the token index of the symbol returned by - # {@link ANTLRErrorStrategy#recoverInline} is -1, the symbol is added to - # the parse tree by calling {@link ParserRuleContext#addErrorNode}.

      - # - # @return the matched symbol - # @throws RecognitionException if the current input symbol did not match - # a wildcard and the error strategy could not recover from the mismatched - # symbol - - def matchWildcard(self): - t = self.getCurrentToken() - if t.type > 0: - self._errHandler.reportMatch(self) - self.consume() - else: - t = self._errHandler.recoverInline(self) - if self.buildParseTrees and t.tokenIndex == -1: - # we must have conjured up a new token during single token insertion - # if it's not the current symbol - self._ctx.addErrorNode(t) - - return t - - def getParseListeners(self): - return list() if self._parseListeners is None else self._parseListeners - - # Registers {@code listener} to receive events during the parsing process. - # - #

      To support output-preserving grammar transformations (including but not - # limited to left-recursion removal, automated left-factoring, and - # optimized code generation), calls to listener methods during the parse - # may differ substantially from calls made by - # {@link ParseTreeWalker#DEFAULT} used after the parse is complete. In - # particular, rule entry and exit events may occur in a different order - # during the parse than after the parser. In addition, calls to certain - # rule entry methods may be omitted.

      - # - #

      With the following specific exceptions, calls to listener events are - # deterministic, i.e. for identical input the calls to listener - # methods will be the same.

      - # - #
        - #
      • Alterations to the grammar used to generate code may change the - # behavior of the listener calls.
      • - #
      • Alterations to the command line options passed to ANTLR 4 when - # generating the parser may change the behavior of the listener calls.
      • - #
      • Changing the version of the ANTLR Tool used to generate the parser - # may change the behavior of the listener calls.
      • - #
      - # - # @param listener the listener to add - # - # @throws NullPointerException if {@code} listener is {@code null} - # - def addParseListener(self, listener): - if listener is None: - raise ReferenceError("listener") - if self._parseListeners is None: - self._parseListeners = [] - self._parseListeners.append(listener) - - # - # Remove {@code listener} from the list of parse listeners. - # - #

      If {@code listener} is {@code null} or has not been added as a parse - # listener, self method does nothing.

      - # @param listener the listener to remove - # - def removeParseListener(self, listener): - if self._parseListeners is not None: - self._parseListeners.remove(listener) - if len(self._parseListeners)==0: - self._parseListeners = None - - # Remove all parse listeners. - def removeParseListeners(self): - self._parseListeners = None - - # Notify any parse listeners of an enter rule event. - def triggerEnterRuleEvent(self): - if self._parseListeners is not None: - for listener in self._parseListeners: - listener.enterEveryRule(self._ctx) - self._ctx.enterRule(listener) - - # - # Notify any parse listeners of an exit rule event. - # - # @see #addParseListener - # - def triggerExitRuleEvent(self): - if self._parseListeners is not None: - # reverse order walk of listeners - for listener in reversed(self._parseListeners): - self._ctx.exitRule(listener) - listener.exitEveryRule(self._ctx) - - # Gets the number of syntax errors reported during parsing. This value is - # incremented each time {@link #notifyErrorListeners} is called. - # - # @see #notifyErrorListeners - # - def getNumberOfSyntaxErrors(self): - return self._syntaxErrors - - def getTokenFactory(self): - return self._input.tokenSource._factory - - # Tell our token source and error strategy about a new way to create tokens.# - def setTokenFactory(self, factory): - self._input.tokenSource._factory = factory - - # The ATN with bypass alternatives is expensive to create so we create it - # lazily. - # - # @throws UnsupportedOperationException if the current parser does not - # implement the {@link #getSerializedATN()} method. - # - def getATNWithBypassAlts(self): - serializedAtn = self.getSerializedATN() - if serializedAtn is None: - raise UnsupportedOperationException("The current parser does not support an ATN with bypass alternatives.") - result = self.bypassAltsAtnCache.get(serializedAtn, None) - if result is None: - deserializationOptions = ATNDeserializationOptions() - deserializationOptions.generateRuleBypassTransitions = True - result = ATNDeserializer(deserializationOptions).deserialize(serializedAtn) - self.bypassAltsAtnCache[serializedAtn] = result - return result - - # The preferred method of getting a tree pattern. For example, here's a - # sample use: - # - #
      -    # ParseTree t = parser.expr();
      -    # ParseTreePattern p = parser.compileParseTreePattern("<ID>+0", MyParser.RULE_expr);
      -    # ParseTreeMatch m = p.match(t);
      -    # String id = m.get("ID");
      -    # 
      - # - def compileParseTreePattern(self, pattern, patternRuleIndex, lexer = None): - if lexer is None: - if self.getTokenStream() is not None: - tokenSource = self.getTokenStream().tokenSource - if isinstance( tokenSource, Lexer ): - lexer = tokenSource - if lexer is None: - raise UnsupportedOperationException("Parser can't discover a lexer to use") - - m = ParseTreePatternMatcher(lexer, self) - return m.compile(pattern, patternRuleIndex) - - - def getInputStream(self): - return self.getTokenStream() - - def setInputStream(self, input): - self.setTokenStream(input) - - def getTokenStream(self): - return self._input - - # Set the token stream and reset the parser.# - def setTokenStream(self, input): - self._input = None - self.reset() - self._input = input - - # Match needs to return the current input symbol, which gets put - # into the label for the associated token ref; e.g., x=ID. - # - def getCurrentToken(self): - return self._input.LT(1) - - def notifyErrorListeners(self, msg, offendingToken = None, e = None): - if offendingToken is None: - offendingToken = self.getCurrentToken() - self._syntaxErrors += 1 - line = offendingToken.line - column = offendingToken.column - listener = self.getErrorListenerDispatch() - listener.syntaxError(self, offendingToken, line, column, msg, e) - - # - # Consume and return the {@linkplain #getCurrentToken current symbol}. - # - #

      E.g., given the following input with {@code A} being the current - # lookahead symbol, self function moves the cursor to {@code B} and returns - # {@code A}.

      - # - #
      -    #  A B
      -    #  ^
      -    # 
      - # - # If the parser is not in error recovery mode, the consumed symbol is added - # to the parse tree using {@link ParserRuleContext#addChild(Token)}, and - # {@link ParseTreeListener#visitTerminal} is called on any parse listeners. - # If the parser is in error recovery mode, the consumed symbol is - # added to the parse tree using - # {@link ParserRuleContext#addErrorNode(Token)}, and - # {@link ParseTreeListener#visitErrorNode} is called on any parse - # listeners. - # - def consume(self): - o = self.getCurrentToken() - if o.type != Token.EOF: - self.getInputStream().consume() - hasListener = self._parseListeners is not None and len(self._parseListeners)>0 - if self.buildParseTrees or hasListener: - if self._errHandler.inErrorRecoveryMode(self): - node = self._ctx.addErrorNode(o) - else: - node = self._ctx.addTokenNode(o) - if hasListener: - for listener in self._parseListeners: - if isinstance(node, ErrorNode): - listener.visitErrorNode(node) - elif isinstance(node, TerminalNode): - listener.visitTerminal(node) - return o - - def addContextToParseTree(self): - # add current context to parent if we have a parent - if self._ctx.parentCtx is not None: - self._ctx.parentCtx.addChild(self._ctx) - - # Always called by generated parsers upon entry to a rule. Access field - # {@link #_ctx} get the current context. - # - def enterRule(self, localctx , state , ruleIndex ): - self.state = state - self._ctx = localctx - self._ctx.start = self._input.LT(1) - if self.buildParseTrees: - self.addContextToParseTree() - if self._parseListeners is not None: - self.triggerEnterRuleEvent() - - def exitRule(self): - self._ctx.stop = self._input.LT(-1) - # trigger event on _ctx, before it reverts to parent - if self._parseListeners is not None: - self.triggerExitRuleEvent() - self.state = self._ctx.invokingState - self._ctx = self._ctx.parentCtx - - def enterOuterAlt(self, localctx, altNum): - localctx.setAltNumber(altNum) - # if we have new localctx, make sure we replace existing ctx - # that is previous child of parse tree - if self.buildParseTrees and self._ctx != localctx: - if self._ctx.parentCtx is not None: - self._ctx.parentCtx.removeLastChild() - self._ctx.parentCtx.addChild(localctx) - self._ctx = localctx - - # Get the precedence level for the top-most precedence rule. - # - # @return The precedence level for the top-most precedence rule, or -1 if - # the parser context is not nested within a precedence rule. - # - def getPrecedence(self): - if len(self._precedenceStack)==0: - return -1 - else: - return self._precedenceStack[-1] - - def enterRecursionRule(self, localctx, state, ruleIndex, precedence): - self.state = state - self._precedenceStack.append(precedence) - self._ctx = localctx - self._ctx.start = self._input.LT(1) - if self._parseListeners is not None: - self.triggerEnterRuleEvent() # simulates rule entry for left-recursive rules - - # - # Like {@link #enterRule} but for recursive rules. - # - def pushNewRecursionContext(self, localctx, state, ruleIndex): - previous = self._ctx - previous.parentCtx = localctx - previous.invokingState = state - previous.stop = self._input.LT(-1) - - self._ctx = localctx - self._ctx.start = previous.start - if self.buildParseTrees: - self._ctx.addChild(previous) - - if self._parseListeners is not None: - self.triggerEnterRuleEvent() # simulates rule entry for left-recursive rules - - def unrollRecursionContexts(self, parentCtx): - self._precedenceStack.pop() - self._ctx.stop = self._input.LT(-1) - retCtx = self._ctx # save current ctx (return value) - # unroll so _ctx is as it was before call to recursive method - if self._parseListeners is not None: - while self._ctx is not parentCtx: - self.triggerExitRuleEvent() - self._ctx = self._ctx.parentCtx - else: - self._ctx = parentCtx - - # hook into tree - retCtx.parentCtx = parentCtx - - if self.buildParseTrees and parentCtx is not None: - # add return ctx into invoking rule's tree - parentCtx.addChild(retCtx) - - def getInvokingContext(self, ruleIndex): - ctx = self._ctx - while ctx is not None: - if ctx.ruleIndex == ruleIndex: - return ctx - ctx = ctx.parentCtx - return None - - - def precpred(self, localctx , precedence): - return precedence >= self._precedenceStack[-1] - - def inContext(self, context): - # TODO: useful in parser? - return False - - # - # Checks whether or not {@code symbol} can follow the current state in the - # ATN. The behavior of self method is equivalent to the following, but is - # implemented such that the complete context-sensitive follow set does not - # need to be explicitly constructed. - # - #
      -    # return getExpectedTokens().contains(symbol);
      -    # 
      - # - # @param symbol the symbol type to check - # @return {@code true} if {@code symbol} can follow the current state in - # the ATN, otherwise {@code false}. - # - def isExpectedToken(self, symbol): - atn = self._interp.atn - ctx = self._ctx - s = atn.states[self.state] - following = atn.nextTokens(s) - if symbol in following: - return True - if not Token.EPSILON in following: - return False - - while ctx is not None and ctx.invokingState>=0 and Token.EPSILON in following: - invokingState = atn.states[ctx.invokingState] - rt = invokingState.transitions[0] - following = atn.nextTokens(rt.followState) - if symbol in following: - return True - ctx = ctx.parentCtx - - if Token.EPSILON in following and symbol == Token.EOF: - return True - else: - return False - - # Computes the set of input symbols which could follow the current parser - # state and context, as given by {@link #getState} and {@link #getContext}, - # respectively. - # - # @see ATN#getExpectedTokens(int, RuleContext) - # - def getExpectedTokens(self): - return self._interp.atn.getExpectedTokens(self.state, self._ctx) - - def getExpectedTokensWithinCurrentRule(self): - atn = self._interp.atn - s = atn.states[self.state] - return atn.nextTokens(s) - - # Get a rule's index (i.e., {@code RULE_ruleName} field) or -1 if not found.# - def getRuleIndex(self, ruleName): - ruleIndex = self.getRuleIndexMap().get(ruleName, None) - if ruleIndex is not None: - return ruleIndex - else: - return -1 - - # Return List<String> of the rule names in your parser instance - # leading up to a call to the current rule. You could override if - # you want more details such as the file/line info of where - # in the ATN a rule is invoked. - # - # this is very useful for error messages. - # - def getRuleInvocationStack(self, p=None): - if p is None: - p = self._ctx - stack = list() - while p is not None: - # compute what follows who invoked us - ruleIndex = p.getRuleIndex() - if ruleIndex<0: - stack.append("n/a") - else: - stack.append(self.ruleNames[ruleIndex]) - p = p.parentCtx - return stack - - # For debugging and other purposes.# - def getDFAStrings(self): - return [ unicode(dfa) for dfa in self._interp.decisionToDFA] - - # For debugging and other purposes.# - def dumpDFA(self): - seenOne = False - for i in range(0, len(self._interp.decisionToDFA)): - dfa = self._interp.decisionToDFA[i] - if len(dfa.states)>0: - if seenOne: - print(file=self._output) - print("Decision " + str(dfa.decision) + ":", file=self._output) - print(dfa.toString(self.literalNames, self.symbolicNames), end='', file=self._output) - seenOne = True - - - def getSourceName(self): - return self._input.sourceName - - # During a parse is sometimes useful to listen in on the rule entry and exit - # events as well as token matches. self is for quick and dirty debugging. - # - def setTrace(self, trace): - if not trace: - self.removeParseListener(self._tracer) - self._tracer = None - else: - if self._tracer is not None: - self.removeParseListener(self._tracer) - self._tracer = TraceListener(self) - self.addParseListener(self._tracer) diff --git a/runtime/Python2/src/antlr4/ParserInterpreter.py b/runtime/Python2/src/antlr4/ParserInterpreter.py deleted file mode 100644 index 6f9ef8da8a..0000000000 --- a/runtime/Python2/src/antlr4/ParserInterpreter.py +++ /dev/null @@ -1,163 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - -# A parser simulator that mimics what ANTLR's generated -# parser code does. A ParserATNSimulator is used to make -# predictions via adaptivePredict but this class moves a pointer through the -# ATN to simulate parsing. ParserATNSimulator just -# makes us efficient rather than having to backtrack, for example. -# -# This properly creates parse trees even for left recursive rules. -# -# We rely on the left recursive rule invocation and special predicate -# transitions to make left recursive rules work. -# -# See TestParserInterpreter for examples. -# -from antlr4 import PredictionContextCache -from antlr4.dfa.DFA import DFA -from antlr4.Lexer import Lexer -from antlr4.Parser import Parser -from antlr4.ParserRuleContext import InterpreterRuleContext -from antlr4.Token import Token -from antlr4.atn.ATNState import StarLoopEntryState, ATNState, LoopEndState -from antlr4.atn.ParserATNSimulator import ParserATNSimulator -from antlr4.atn.Transition import Transition -from antlr4.error.Errors import RecognitionException, UnsupportedOperationException, FailedPredicateException - - -class ParserInterpreter(Parser): - - def __init__(self, grammarFileName, tokenNames, ruleNames, atn, input): - super(ParserInterpreter, self).__init__(input) - self.grammarFileName = grammarFileName - self.atn = atn - self.tokenNames = tokenNames - self.ruleNames = ruleNames - self.decisionToDFA = [ DFA(state) for state in atn.decisionToState ] - self.sharedContextCache = PredictionContextCache() - self._parentContextStack = list() - # identify the ATN states where pushNewRecursionContext must be called - self.pushRecursionContextStates = set() - for state in atn.states: - if not isinstance(state, StarLoopEntryState): - continue - if state.isPrecedenceDecision: - self.pushRecursionContextStates.add(state.stateNumber) - # get atn simulator that knows how to do predictions - self._interp = ParserATNSimulator(self, atn, self.decisionToDFA, self.sharedContextCache) - - # Begin parsing at startRuleIndex# - def parse(self, startRuleIndex): - startRuleStartState = self.atn.ruleToStartState[startRuleIndex] - rootContext = InterpreterRuleContext(None, ATNState.INVALID_STATE_NUMBER, startRuleIndex) - if startRuleStartState.isPrecedenceRule: - self.enterRecursionRule(rootContext, startRuleStartState.stateNumber, startRuleIndex, 0) - else: - self.enterRule(rootContext, startRuleStartState.stateNumber, startRuleIndex) - while True: - p = self.getATNState() - if p.stateType==ATNState.RULE_STOP : - # pop; return from rule - if len(self._ctx)==0: - if startRuleStartState.isPrecedenceRule: - result = self._ctx - parentContext = self._parentContextStack.pop() - self.unrollRecursionContexts(parentContext.a) - return result - else: - self.exitRule() - return rootContext - self.visitRuleStopState(p) - - else: - try: - self.visitState(p) - except RecognitionException as e: - self.state = self.atn.ruleToStopState[p.ruleIndex].stateNumber - self._ctx.exception = e - self._errHandler.reportError(self, e) - self._errHandler.recover(self, e) - - def enterRecursionRule(self, localctx, state, ruleIndex, precedence): - self._parentContextStack.append((self._ctx, localctx.invokingState)) - super(ParserInterpreter, self).enterRecursionRule(localctx, state, ruleIndex, precedence) - - def getATNState(self): - return self.atn.states[self.state] - - def visitState(self, p): - edge = 0 - if len(p.transitions) > 1: - self._errHandler.sync(self) - edge = self._interp.adaptivePredict(self._input, p.decision, self._ctx) - else: - edge = 1 - - transition = p.transitions[edge - 1] - tt = transition.serializationType - if tt==Transition.EPSILON: - - if self.pushRecursionContextStates[p.stateNumber] and not isinstance(transition.target, LoopEndState): - t = self._parentContextStack[-1] - ctx = InterpreterRuleContext(t[0], t[1], self._ctx.ruleIndex) - self.pushNewRecursionContext(ctx, self.atn.ruleToStartState[p.ruleIndex].stateNumber, self._ctx.ruleIndex) - - elif tt==Transition.ATOM: - - self.match(transition.label) - - elif tt in [ Transition.RANGE, Transition.SET, Transition.NOT_SET]: - - if not transition.matches(self._input.LA(1), Token.MIN_USER_TOKEN_TYPE, Lexer.MAX_CHAR_VALUE): - self._errHandler.recoverInline(self) - self.matchWildcard() - - elif tt==Transition.WILDCARD: - - self.matchWildcard() - - elif tt==Transition.RULE: - - ruleStartState = transition.target - ruleIndex = ruleStartState.ruleIndex - ctx = InterpreterRuleContext(self._ctx, p.stateNumber, ruleIndex) - if ruleStartState.isPrecedenceRule: - self.enterRecursionRule(ctx, ruleStartState.stateNumber, ruleIndex, transition.precedence) - else: - self.enterRule(ctx, transition.target.stateNumber, ruleIndex) - - elif tt==Transition.PREDICATE: - - if not self.sempred(self._ctx, transition.ruleIndex, transition.predIndex): - raise FailedPredicateException(self) - - elif tt==Transition.ACTION: - - self.action(self._ctx, transition.ruleIndex, transition.actionIndex) - - elif tt==Transition.PRECEDENCE: - - if not self.precpred(self._ctx, transition.precedence): - msg = "precpred(_ctx, " + str(transition.precedence) + ")" - raise FailedPredicateException(self, msg) - - else: - raise UnsupportedOperationException("Unrecognized ATN transition type.") - - self.state = transition.target.stateNumber - - def visitRuleStopState(self, p): - ruleStartState = self.atn.ruleToStartState[p.ruleIndex] - if ruleStartState.isPrecedenceRule: - parentContext = self._parentContextStack.pop() - self.unrollRecursionContexts(parentContext.a) - self.state = parentContext[1] - else: - self.exitRule() - - ruleTransition = self.atn.states[self.state].transitions[0] - self.state = ruleTransition.followState.stateNumber diff --git a/runtime/Python2/src/antlr4/ParserRuleContext.py b/runtime/Python2/src/antlr4/ParserRuleContext.py deleted file mode 100644 index 0930abf308..0000000000 --- a/runtime/Python2/src/antlr4/ParserRuleContext.py +++ /dev/null @@ -1,181 +0,0 @@ -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. - -#* A rule invocation record for parsing. -# -# Contains all of the information about the current rule not stored in the -# RuleContext. It handles parse tree children list, Any ATN state -# tracing, and the default values available for rule indications: -# start, stop, rule index, current alt number, current -# ATN state. -# -# Subclasses made for each rule and grammar track the parameters, -# return values, locals, and labels specific to that rule. These -# are the objects that are returned from rules. -# -# Note text is not an actual field of a rule return value; it is computed -# from start and stop using the input stream's toString() method. I -# could add a ctor to this so that we can pass in and store the input -# stream, but I'm not sure we want to do that. It would seem to be undefined -# to get the .text property anyway if the rule matches tokens from multiple -# input streams. -# -# I do not use getters for fields of objects that are used simply to -# group values such as this aggregate. The getters/setters are there to -# satisfy the superclass interface. - -from antlr4.RuleContext import RuleContext -from antlr4.tree.Tree import TerminalNodeImpl, ErrorNodeImpl, TerminalNode, INVALID_INTERVAL - -class ParserRuleContext(RuleContext): - - def __init__(self, parent = None, invokingStateNumber = None ): - super(ParserRuleContext, self).__init__(parent, invokingStateNumber) - #* If we are debugging or building a parse tree for a visitor, - # we need to track all of the tokens and rule invocations associated - # with this rule's context. This is empty for parsing w/o tree constr. - # operation because we don't the need to track the details about - # how we parse this rule. - #/ - self.children = None - self.start = None - self.stop = None - # The exception that forced this rule to return. If the rule successfully - # completed, this is {@code null}. - self.exception = None - - #* COPY a ctx (I'm deliberately not using copy constructor)#/ - # - # This is used in the generated parser code to flip a generic XContext - # node for rule X to a YContext for alt label Y. In that sense, it is - # not really a generic copy function. - # - # If we do an error sync() at start of a rule, we might add error nodes - # to the generic XContext so this function must copy those nodes to - # the YContext as well else they are lost! - #/ - def copyFrom(self, ctx): - # from RuleContext - self.parentCtx = ctx.parentCtx - self.invokingState = ctx.invokingState - self.children = None - self.start = ctx.start - self.stop = ctx.stop - - # copy any error nodes to alt label node - if ctx.children is not None: - self.children = [] - # reset parent pointer for any error nodes - for child in ctx.children: - if isinstance(child, ErrorNodeImpl): - self.children.append(child) - child.parentCtx = self - - # Double dispatch methods for listeners - def enterRule(self, listener): - pass - - def exitRule(self, listener): - pass - - #* Does not set parent link; other add methods do that#/ - def addChild(self, child): - if self.children is None: - self.children = [] - self.children.append(child) - return child - - #* Used by enterOuterAlt to toss out a RuleContext previously added as - # we entered a rule. If we have # label, we will need to remove - # generic ruleContext object. - #/ - def removeLastChild(self): - if self.children is not None: - del self.children[len(self.children)-1] - - def addTokenNode(self, token): - node = TerminalNodeImpl(token) - self.addChild(node) - node.parentCtx = self - return node - - def addErrorNode(self, badToken): - node = ErrorNodeImpl(badToken) - self.addChild(node) - node.parentCtx = self - return node - - def getChild(self, i, ttype = None): - if ttype is None: - return self.children[i] if len(self.children)>i else None - else: - for child in self.getChildren(): - if not isinstance(child, ttype): - continue - if i==0: - return child - i -= 1 - return None - - def getChildren(self, predicate = None): - if self.children is not None: - for child in self.children: - if predicate is not None and not predicate(child): - continue - yield child - - def getToken(self, ttype, i): - for child in self.getChildren(): - if not isinstance(child, TerminalNode): - continue - if child.symbol.type != ttype: - continue - if i==0: - return child - i -= 1 - return None - - def getTokens(self, ttype ): - if self.getChildren() is None: - return [] - tokens = [] - for child in self.getChildren(): - if not isinstance(child, TerminalNode): - continue - if child.symbol.type != ttype: - continue - tokens.append(child) - return tokens - - def getTypedRuleContext(self, ctxType, i): - return self.getChild(i, ctxType) - - def getTypedRuleContexts(self, ctxType): - children = self.getChildren() - if children is None: - return [] - contexts = [] - for child in children: - if not isinstance(child, ctxType): - continue - contexts.append(child) - return contexts - - def getChildCount(self): - return len(self.children) if self.children else 0 - - def getSourceInterval(self): - if self.start is None or self.stop is None: - return INVALID_INTERVAL - else: - return (self.start.tokenIndex, self.stop.tokenIndex) - - -RuleContext.EMPTY = ParserRuleContext() - -class InterpreterRuleContext(ParserRuleContext): - - def __init__(self, parent, invokingStateNumber, ruleIndex): - super(InterpreterRuleContext, self).__init__(parent, invokingStateNumber) - self.ruleIndex = ruleIndex diff --git a/runtime/Python2/src/antlr4/PredictionContext.py b/runtime/Python2/src/antlr4/PredictionContext.py deleted file mode 100644 index 732f2ad9af..0000000000 --- a/runtime/Python2/src/antlr4/PredictionContext.py +++ /dev/null @@ -1,632 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -#/ -from antlr4.RuleContext import RuleContext -from antlr4.error.Errors import IllegalStateException -from io import StringIO - -# dup ParserATNSimulator class var here to avoid circular import; no idea why this can't be in PredictionContext -_trace_atn_sim = False - -class PredictionContext(object): - - # Represents {@code $} in local context prediction, which means wildcard. - # {@code#+x =#}. - #/ - EMPTY = None - - # Represents {@code $} in an array in full context mode, when {@code $} - # doesn't mean wildcard: {@code $ + x = [$,x]}. Here, - # {@code $} = {@link #EMPTY_RETURN_STATE}. - #/ - EMPTY_RETURN_STATE = 0x7FFFFFFF - - globalNodeCount = 1 - id = globalNodeCount - - # Stores the computed hash code of this {@link PredictionContext}. The hash - # code is computed in parts to match the following reference algorithm. - # - #
      -    #  private int referenceHashCode() {
      -    #      int hash = {@link MurmurHash#initialize MurmurHash.initialize}({@link #INITIAL_HASH});
      -    #
      -    #      for (int i = 0; i < {@link #size()}; i++) {
      -    #          hash = {@link MurmurHash#update MurmurHash.update}(hash, {@link #getParent getParent}(i));
      -    #      }
      -    #
      -    #      for (int i = 0; i < {@link #size()}; i++) {
      -    #          hash = {@link MurmurHash#update MurmurHash.update}(hash, {@link #getReturnState getReturnState}(i));
      -    #      }
      -    #
      -    #      hash = {@link MurmurHash#finish MurmurHash.finish}(hash, 2# {@link #size()});
      -    #      return hash;
      -    #  }
      -    # 
      - #/ - - def __init__(self, cachedHashCode): - self.cachedHashCode = cachedHashCode - - def __len__(self): - return 0 - - # This means only the {@link #EMPTY} context is in set. - def isEmpty(self): - return self is self.EMPTY - - def hasEmptyPath(self): - return self.getReturnState(len(self) - 1) == self.EMPTY_RETURN_STATE - - def getReturnState(self, index): - raise IllegalStateException("illegal!") - - def __hash__(self): - return self.cachedHashCode - - def __str__(self): - return unicode(self) - - -def calculateHashCode(parent, returnState): - return hash("") if parent is None else hash((hash(parent), returnState)) - -def calculateListsHashCode(parents, returnStates ): - h = 0 - for parent, returnState in zip(parents, returnStates): - h = hash((h, calculateHashCode(parent, returnState))) - return h - -# Used to cache {@link PredictionContext} objects. Its used for the shared -# context cash associated with contexts in DFA states. This cache -# can be used for both lexers and parsers. - -class PredictionContextCache(object): - - def __init__(self): - self.cache = dict() - - # Add a context to the cache and return it. If the context already exists, - # return that one instead and do not add a new context to the cache. - # Protect shared cache from unsafe thread access. - # - def add(self, ctx): - if ctx==PredictionContext.EMPTY: - return PredictionContext.EMPTY - existing = self.cache.get(ctx, None) - if existing is not None: - return existing - self.cache[ctx] = ctx - return ctx - - def get(self, ctx): - return self.cache.get(ctx, None) - - def __len__(self): - return len(self.cache) - - -class SingletonPredictionContext(PredictionContext): - - @staticmethod - def create(parent , returnState ): - if returnState == PredictionContext.EMPTY_RETURN_STATE and parent is None: - # someone can pass in the bits of an array ctx that mean $ - return SingletonPredictionContext.EMPTY - else: - return SingletonPredictionContext(parent, returnState) - - def __init__(self, parent, returnState): - hashCode = calculateHashCode(parent, returnState) - super(SingletonPredictionContext, self).__init__(hashCode) - self.parentCtx = parent - self.returnState = returnState - - def __len__(self): - return 1 - - def getParent(self, index): - return self.parentCtx - - def getReturnState(self, index): - return self.returnState - - def __eq__(self, other): - if self is other: - return True - elif other is None: - return False - elif not isinstance(other, SingletonPredictionContext): - return False - else: - return self.returnState == other.returnState and self.parentCtx==other.parentCtx - - def __hash__(self): - return self.cachedHashCode - - def __unicode__(self): - up = "" if self.parentCtx is None else unicode(self.parentCtx) - if len(up)==0: - if self.returnState == self.EMPTY_RETURN_STATE: - return u"$" - else: - return unicode(self.returnState) - else: - return unicode(self.returnState) + u" " + up - - -class EmptyPredictionContext(SingletonPredictionContext): - - def __init__(self): - super(EmptyPredictionContext, self).__init__(None, self.EMPTY_RETURN_STATE) - - def isEmpty(self): - return True - - def __eq__(self, other): - return self is other - - def __hash__(self): - return self.cachedHashCode - - def __unicode__(self): - return "$" - - -PredictionContext.EMPTY = EmptyPredictionContext() - -class ArrayPredictionContext(PredictionContext): - # Parent can be null only if full ctx mode and we make an array - # from {@link #EMPTY} and non-empty. We merge {@link #EMPTY} by using null parent and - # returnState == {@link #EMPTY_RETURN_STATE}. - - def __init__(self, parents, returnStates): - super(ArrayPredictionContext, self).__init__(calculateListsHashCode(parents, returnStates)) - self.parents = parents - self.returnStates = returnStates - - def isEmpty(self): - # since EMPTY_RETURN_STATE can only appear in the last position, we - # don't need to verify that size==1 - return self.returnStates[0]==PredictionContext.EMPTY_RETURN_STATE - - def __len__(self): - return len(self.returnStates) - - def getParent(self, index): - return self.parents[index] - - def getReturnState(self, index): - return self.returnStates[index] - - def __eq__(self, other): - if self is other: - return True - elif not isinstance(other, ArrayPredictionContext): - return False - elif hash(self) != hash(other): - return False # can't be same if hash is different - else: - return self.returnStates==other.returnStates and self.parents==other.parents - - def __unicode__(self): - if self.isEmpty(): - return "[]" - with StringIO() as buf: - buf.write(u"[") - for i in range(0,len(self.returnStates)): - if i>0: - buf.write(u", ") - if self.returnStates[i]==PredictionContext.EMPTY_RETURN_STATE: - buf.write(u"$") - continue - buf.write(unicode(self.returnStates[i])) - if self.parents[i] is not None: - buf.write(u' ') - buf.write(unicode(self.parents[i])) - else: - buf.write(u"null") - buf.write(u"]") - return buf.getvalue() - - def __hash__(self): - return self.cachedHashCode - - - -# Convert a {@link RuleContext} tree to a {@link PredictionContext} graph. -# Return {@link #EMPTY} if {@code outerContext} is empty or null. -#/ -def PredictionContextFromRuleContext(atn, outerContext=None): - if outerContext is None: - outerContext = RuleContext.EMPTY - - # if we are in RuleContext of start rule, s, then PredictionContext - # is EMPTY. Nobody called us. (if we are empty, return empty) - if outerContext.parentCtx is None or outerContext is RuleContext.EMPTY: - return PredictionContext.EMPTY - - # If we have a parent, convert it to a PredictionContext graph - parent = PredictionContextFromRuleContext(atn, outerContext.parentCtx) - state = atn.states[outerContext.invokingState] - transition = state.transitions[0] - return SingletonPredictionContext.create(parent, transition.followState.stateNumber) - - -def merge(a, b, rootIsWildcard, mergeCache): - - # share same graph if both same - if a==b: - return a - - if isinstance(a, SingletonPredictionContext) and isinstance(b, SingletonPredictionContext): - return mergeSingletons(a, b, rootIsWildcard, mergeCache) - - # At least one of a or b is array - # If one is $ and rootIsWildcard, return $ as# wildcard - if rootIsWildcard: - if isinstance( a, EmptyPredictionContext ): - return a - if isinstance( b, EmptyPredictionContext ): - return b - - # convert singleton so both are arrays to normalize - if isinstance( a, SingletonPredictionContext ): - a = ArrayPredictionContext([a.parentCtx], [a.returnState]) - if isinstance( b, SingletonPredictionContext): - b = ArrayPredictionContext([b.parentCtx], [b.returnState]) - return mergeArrays(a, b, rootIsWildcard, mergeCache) - - -# -# Merge two {@link SingletonPredictionContext} instances. -# -#

      Stack tops equal, parents merge is same; return left graph.
      -#

      -# -#

      Same stack top, parents differ; merge parents giving array node, then -# remainders of those graphs. A new root node is created to point to the -# merged parents.
      -#

      -# -#

      Different stack tops pointing to same parent. Make array node for the -# root where both element in the root point to the same (original) -# parent.
      -#

      -# -#

      Different stack tops pointing to different parents. Make array node for -# the root where each element points to the corresponding original -# parent.
      -#

      -# -# @param a the first {@link SingletonPredictionContext} -# @param b the second {@link SingletonPredictionContext} -# @param rootIsWildcard {@code true} if this is a local-context merge, -# otherwise false to indicate a full-context merge -# @param mergeCache -#/ -def mergeSingletons(a, b, rootIsWildcard, mergeCache): - if mergeCache is not None: - previous = mergeCache.get((a,b), None) - if previous is not None: - return previous - previous = mergeCache.get((b,a), None) - if previous is not None: - return previous - - merged = mergeRoot(a, b, rootIsWildcard) - if merged is not None: - if mergeCache is not None: - mergeCache[(a, b)] = merged - return merged - - if a.returnState==b.returnState: - parent = merge(a.parentCtx, b.parentCtx, rootIsWildcard, mergeCache) - # if parent is same as existing a or b parent or reduced to a parent, return it - if parent == a.parentCtx: - return a # ax + bx = ax, if a=b - if parent == b.parentCtx: - return b # ax + bx = bx, if a=b - # else: ax + ay = a'[x,y] - # merge parents x and y, giving array node with x,y then remainders - # of those graphs. dup a, a' points at merged array - # new joined parent so create new singleton pointing to it, a' - merged = SingletonPredictionContext.create(parent, a.returnState) - if mergeCache is not None: - mergeCache[(a, b)] = merged - return merged - else: # a != b payloads differ - # see if we can collapse parents due to $+x parents if local ctx - singleParent = None - if a is b or (a.parentCtx is not None and a.parentCtx==b.parentCtx): # ax + bx = [a,b]x - singleParent = a.parentCtx - if singleParent is not None: # parents are same - # sort payloads and use same parent - payloads = [ a.returnState, b.returnState ] - if a.returnState > b.returnState: - payloads = [ b.returnState, a.returnState ] - parents = [singleParent, singleParent] - merged = ArrayPredictionContext(parents, payloads) - if mergeCache is not None: - mergeCache[(a, b)] = merged - return merged - # parents differ and can't merge them. Just pack together - # into array; can't merge. - # ax + by = [ax,by] - payloads = [ a.returnState, b.returnState ] - parents = [ a.parentCtx, b.parentCtx ] - if a.returnState > b.returnState: # sort by payload - payloads = [ b.returnState, a.returnState ] - parents = [ b.parentCtx, a.parentCtx ] - merged = ArrayPredictionContext(parents, payloads) - if mergeCache is not None: - mergeCache[(a, b)] = merged - return merged - - -# -# Handle case where at least one of {@code a} or {@code b} is -# {@link #EMPTY}. In the following diagrams, the symbol {@code $} is used -# to represent {@link #EMPTY}. -# -#

      Local-Context Merges

      -# -#

      These local-context merge operations are used when {@code rootIsWildcard} -# is true.

      -# -#

      {@link #EMPTY} is superset of any graph; return {@link #EMPTY}.
      -#

      -# -#

      {@link #EMPTY} and anything is {@code #EMPTY}, so merged parent is -# {@code #EMPTY}; return left graph.
      -#

      -# -#

      Special case of last merge if local context.
      -#

      -# -#

      Full-Context Merges

      -# -#

      These full-context merge operations are used when {@code rootIsWildcard} -# is false.

      -# -#

      -# -#

      Must keep all contexts; {@link #EMPTY} in array is a special value (and -# null parent).
      -#

      -# -#

      -# -# @param a the first {@link SingletonPredictionContext} -# @param b the second {@link SingletonPredictionContext} -# @param rootIsWildcard {@code true} if this is a local-context merge, -# otherwise false to indicate a full-context merge -#/ -def mergeRoot(a, b, rootIsWildcard): - if rootIsWildcard: - if a == PredictionContext.EMPTY: - return PredictionContext.EMPTY ## + b =# - if b == PredictionContext.EMPTY: - return PredictionContext.EMPTY # a +# =# - else: - if a == PredictionContext.EMPTY and b == PredictionContext.EMPTY: - return PredictionContext.EMPTY # $ + $ = $ - elif a == PredictionContext.EMPTY: # $ + x = [$,x] - payloads = [ b.returnState, PredictionContext.EMPTY_RETURN_STATE ] - parents = [ b.parentCtx, None ] - return ArrayPredictionContext(parents, payloads) - elif b == PredictionContext.EMPTY: # x + $ = [$,x] ($ is always first if present) - payloads = [ a.returnState, PredictionContext.EMPTY_RETURN_STATE ] - parents = [ a.parentCtx, None ] - return ArrayPredictionContext(parents, payloads) - return None - - -# -# Merge two {@link ArrayPredictionContext} instances. -# -#

      Different tops, different parents.
      -#

      -# -#

      Shared top, same parents.
      -#

      -# -#

      Shared top, different parents.
      -#

      -# -#

      Shared top, all shared parents.
      -#

      -# -#

      Equal tops, merge parents and reduce top to -# {@link SingletonPredictionContext}.
      -#

      -#/ -def mergeArrays(a, b, rootIsWildcard, mergeCache): - if mergeCache is not None: - previous = mergeCache.get((a,b), None) - if previous is not None: - if _trace_atn_sim: print("mergeArrays a="+str(a)+",b="+str(b)+" -> previous") - return previous - previous = mergeCache.get((b,a), None) - if previous is not None: - if _trace_atn_sim: print("mergeArrays a="+str(a)+",b="+str(b)+" -> previous") - return previous - - # merge sorted payloads a + b => M - i = 0 # walks a - j = 0 # walks b - k = 0 # walks target M array - - mergedReturnStates = [None] * (len(a.returnStates) + len( b.returnStates)) - mergedParents = [None] * len(mergedReturnStates) - # walk and merge to yield mergedParents, mergedReturnStates - while i ax - if bothDollars or ax_ax: - mergedParents[k] = a_parent # choose left - mergedReturnStates[k] = payload - else: # ax+ay -> a'[x,y] - mergedParent = merge(a_parent, b_parent, rootIsWildcard, mergeCache) - mergedParents[k] = mergedParent - mergedReturnStates[k] = payload - i += 1 # hop over left one as usual - j += 1 # but also skip one in right side since we merge - elif a.returnStates[i] a, copy b[j] to M - mergedParents[k] = b_parent - mergedReturnStates[k] = b.returnStates[j] - j += 1 - k += 1 - - # copy over any payloads remaining in either array - if i < len(a.returnStates): - for p in range(i, len(a.returnStates)): - mergedParents[k] = a.parents[p] - mergedReturnStates[k] = a.returnStates[p] - k += 1 - else: - for p in range(j, len(b.returnStates)): - mergedParents[k] = b.parents[p] - mergedReturnStates[k] = b.returnStates[p] - k += 1 - - # trim merged if we combined a few that had same stack tops - if k < len(mergedParents): # write index < last position; trim - if k == 1: # for just one merged element, return singleton top - merged = SingletonPredictionContext.create(mergedParents[0], mergedReturnStates[0]) - if mergeCache is not None: - mergeCache[(a,b)] = merged - return merged - mergedParents = mergedParents[0:k] - mergedReturnStates = mergedReturnStates[0:k] - - merged = ArrayPredictionContext(mergedParents, mergedReturnStates) - - # if we created same array as a or b, return that instead - # TODO: track whether this is possible above during merge sort for speed - if merged==a: - if mergeCache is not None: - mergeCache[(a,b)] = a - if _trace_atn_sim: print("mergeArrays a="+str(a)+",b="+str(b)+" -> a") - return a - if merged==b: - if mergeCache is not None: - mergeCache[(a,b)] = b - if _trace_atn_sim: print("mergeArrays a="+str(a)+",b="+str(b)+" -> b") - return b - combineCommonParents(mergedParents) - - if mergeCache is not None: - mergeCache[(a,b)] = merged - - if _trace_atn_sim: print("mergeArrays a="+str(a)+",b="+str(b)+" -> "+str(M)) - - return merged - - -# -# Make pass over all M {@code parents}; merge any {@code equals()} -# ones. -#/ -def combineCommonParents(parents): - uniqueParents = dict() - - for p in range(0, len(parents)): - parent = parents[p] - if uniqueParents.get(parent, None) is None: - uniqueParents[parent] = parent - - for p in range(0, len(parents)): - parents[p] = uniqueParents[parents[p]] - -def getCachedPredictionContext(context, contextCache, visited): - if context.isEmpty(): - return context - existing = visited.get(context) - if existing is not None: - return existing - existing = contextCache.get(context) - if existing is not None: - visited[context] = existing - return existing - changed = False - parents = [None] * len(context) - for i in range(0, len(parents)): - parent = getCachedPredictionContext(context.getParent(i), contextCache, visited) - if changed or parent is not context.getParent(i): - if not changed: - parents = [context.getParent(j) for j in range(len(context))] - changed = True - parents[i] = parent - if not changed: - contextCache.add(context) - visited[context] = context - return context - - updated = None - if len(parents) == 0: - updated = PredictionContext.EMPTY - elif len(parents) == 1: - updated = SingletonPredictionContext.create(parents[0], context.getReturnState(0)) - else: - updated = ArrayPredictionContext(parents, context.returnStates) - - contextCache.add(updated) - visited[updated] = updated - visited[context] = updated - - return updated - - -# # extra structures, but cut/paste/morphed works, so leave it. -# # seems to do a breadth-first walk -# public static List getAllNodes(PredictionContext context) { -# Map visited = -# new IdentityHashMap(); -# Deque workList = new ArrayDeque(); -# workList.add(context); -# visited.put(context, context); -# List nodes = new ArrayList(); -# while (!workList.isEmpty()) { -# PredictionContext current = workList.pop(); -# nodes.add(current); -# for (int i = 0; i < current.size(); i++) { -# PredictionContext parent = current.getParent(i); -# if ( parent!=null && visited.put(parent, parent) == null) { -# workList.push(parent); -# } -# } -# } -# return nodes; -# } - -# ter's recursive version of Sam's getAllNodes() -def getAllContextNodes(context, nodes=None, visited=None): - if nodes is None: - nodes = list() - return getAllContextNodes(context, nodes, visited) - elif visited is None: - visited = dict() - return getAllContextNodes(context, nodes, visited) - else: - if context is None or visited.get(context, None) is not None: - return nodes - visited.put(context, context) - nodes.add(context) - for i in range(0, len(context)): - getAllContextNodes(context.getParent(i), nodes, visited) - return nodes - diff --git a/runtime/Python2/src/antlr4/Recognizer.py b/runtime/Python2/src/antlr4/Recognizer.py deleted file mode 100644 index 605ee08b38..0000000000 --- a/runtime/Python2/src/antlr4/Recognizer.py +++ /dev/null @@ -1,141 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# -from __builtin__ import unicode -from antlr4.Token import Token -from antlr4.error.ErrorListener import ProxyErrorListener, ConsoleErrorListener - -class Recognizer(object): - - tokenTypeMapCache = dict() - ruleIndexMapCache = dict() - - def __init__(self): - self._listeners = [ ConsoleErrorListener.INSTANCE ] - self._interp = None - self._stateNumber = -1 - - def extractVersion(self, version): - pos = version.find(".") - major = version[0:pos] - version = version[pos+1:] - pos = version.find(".") - if pos==-1: - pos = version.find("-") - if pos==-1: - pos = len(version) - minor = version[0:pos] - return major, minor - - def checkVersion(self, toolVersion): - runtimeVersion = "4.13.0" - rvmajor, rvminor = self.extractVersion(runtimeVersion) - tvmajor, tvminor = self.extractVersion(toolVersion) - if rvmajor!=tvmajor or rvminor!=tvminor: - print("ANTLR runtime and generated code versions disagree: "+runtimeVersion+"!="+toolVersion) - - def addErrorListener(self, listener): - self._listeners.append(listener) - - def removeErrorListener(self, listener): - self._listeners.remove(listener) - - def removeErrorListeners(self): - self._listeners = [] - - def getTokenTypeMap(self): - tokenNames = self.getTokenNames() - if tokenNames is None: - from antlr4.error.Errors import UnsupportedOperationException - raise UnsupportedOperationException("The current recognizer does not provide a list of token names.") - result = self.tokenTypeMapCache.get(tokenNames, None) - if result is None: - result = zip( tokenNames, range(0, len(tokenNames))) - result["EOF"] = Token.EOF - self.tokenTypeMapCache[tokenNames] = result - return result - - # Get a map from rule names to rule indexes. - # - #

      Used for XPath and tree pattern compilation.

      - # - def getRuleIndexMap(self): - ruleNames = self.getRuleNames() - if ruleNames is None: - from antlr4.error.Errors import UnsupportedOperationException - raise UnsupportedOperationException("The current recognizer does not provide a list of rule names.") - result = self.ruleIndexMapCache.get(ruleNames, None) - if result is None: - result = zip( ruleNames, range(0, len(ruleNames))) - self.ruleIndexMapCache[ruleNames] = result - return result - - def getTokenType(self, tokenName): - ttype = self.getTokenTypeMap().get(tokenName, None) - if ttype is not None: - return ttype - else: - return Token.INVALID_TYPE - - - # What is the error header, normally line/character position information?# - def getErrorHeader(self, e): - line = e.getOffendingToken().line - column = e.getOffendingToken().column - return u"line " + unicode(line) + u":" + unicode(column) - - - # How should a token be displayed in an error message? The default - # is to display just the text, but during development you might - # want to have a lot of information spit out. Override in that case - # to use t.toString() (which, for CommonToken, dumps everything about - # the token). This is better than forcing you to override a method in - # your token objects because you don't have to go modify your lexer - # so that it creates a new Java type. - # - # @deprecated This method is not called by the ANTLR 4 Runtime. Specific - # implementations of {@link ANTLRErrorStrategy} may provide a similar - # feature when necessary. For example, see - # {@link DefaultErrorStrategy#getTokenErrorDisplay}. - # - def getTokenErrorDisplay(self, t): - if t is None: - return u"" - s = t.text - if s is None: - if t.type==Token.EOF: - s = u"" - else: - s = u"<" + unicode(t.type) + u">" - s = s.replace(u"\n",u"\\n") - s = s.replace(u"\r",u"\\r") - s = s.replace(u"\t",u"\\t") - return u"'" + s + u"'" - - def getErrorListenerDispatch(self): - return ProxyErrorListener(self._listeners) - - # subclass needs to override these if there are sempreds or actions - # that the ATN interp needs to execute - def sempred(self, localctx, ruleIndex, actionIndex): - return True - - def precpred(self, localctx , precedence): - return True - - @property - def state(self): - return self._stateNumber - - # Indicate that the recognizer has changed internal state that is - # consistent with the ATN state passed in. This way we always know - # where we are in the ATN as the parser goes along. The rule - # context objects form a stack that lets us see the stack of - # invoking rules. Combine this and we have complete ATN - # configuration information. - - @state.setter - def state(self, atnState): - self._stateNumber = atnState diff --git a/runtime/Python2/src/antlr4/RuleContext.py b/runtime/Python2/src/antlr4/RuleContext.py deleted file mode 100644 index c8b5481659..0000000000 --- a/runtime/Python2/src/antlr4/RuleContext.py +++ /dev/null @@ -1,225 +0,0 @@ -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -#/ - - -# A rule context is a record of a single rule invocation. It knows -# which context invoked it, if any. If there is no parent context, then -# naturally the invoking state is not valid. The parent link -# provides a chain upwards from the current rule invocation to the root -# of the invocation tree, forming a stack. We actually carry no -# information about the rule associated with this context (except -# when parsing). We keep only the state number of the invoking state from -# the ATN submachine that invoked this. Contrast this with the s -# pointer inside ParserRuleContext that tracks the current state -# being "executed" for the current rule. -# -# The parent contexts are useful for computing lookahead sets and -# getting error information. -# -# These objects are used during parsing and prediction. -# For the special case of parsers, we use the subclass -# ParserRuleContext. -# -# @see ParserRuleContext -#/ -from io import StringIO -from antlr4.tree.Tree import RuleNode, INVALID_INTERVAL -from antlr4.tree.Trees import Trees -from antlr4.atn.ATN import ATN - -class RuleContext(RuleNode): - - EMPTY = None - - def __init__(self, parent=None, invokingState=-1): - super(RuleContext, self).__init__() - # What context invoked this rule? - self.parentCtx = parent - # What state invoked the rule associated with this context? - # The "return address" is the followState of invokingState - # If parent is null, this should be -1. - self.invokingState = invokingState - - - def depth(self): - n = 0 - p = self - while p is not None: - p = p.parentCtx - n += 1 - return n - - # A context is empty if there is no invoking state; meaning nobody call - # current context. - def isEmpty(self): - return self.invokingState == -1 - - # satisfy the ParseTree / SyntaxTree interface - - def getSourceInterval(self): - return INVALID_INTERVAL - - def getRuleContext(self): - return self - - def getPayload(self): - return self - - # Return the combined text of all child nodes. This method only considers - # tokens which have been added to the parse tree. - #

      - # Since tokens on hidden channels (e.g. whitespace or comments) are not - # added to the parse trees, they will not appear in the output of this - # method. - #/ - def getText(self): - if self.getChildCount() == 0: - return u"" - with StringIO() as builder: - for child in self.getChildren(): - builder.write(child.getText()) - return builder.getvalue() - - def getRuleIndex(self): - return -1 - - # For rule associated with this parse tree internal node, return - # the outer alternative number used to match the input. Default - # implementation does not compute nor store this alt num. Create - # a subclass of ParserRuleContext with backing field and set - # option contextSuperClass. - # to set it. - def getAltNumber(self): - return ATN.INVALID_ALT_NUMBER - - # Set the outer alternative number for this context node. Default - # implementation does nothing to avoid backing field overhead for - # trees that don't need it. Create - # a subclass of ParserRuleContext with backing field and set - # option contextSuperClass. - def setAltNumber(self, altNumber): - pass - - def getChild(self, i): - return None - - def getChildCount(self): - return 0 - - def getChildren(self): - for c in []: - yield c - - def accept(self, visitor): - return visitor.visitChildren(self) - - # # Call this method to view a parse tree in a dialog box visually.#/ - # public Future inspect(@Nullable Parser parser) { - # List ruleNames = parser != null ? Arrays.asList(parser.getRuleNames()) : null; - # return inspect(ruleNames); - # } - # - # public Future inspect(@Nullable List ruleNames) { - # TreeViewer viewer = new TreeViewer(ruleNames, this); - # return viewer.open(); - # } - # - # # Save this tree in a postscript file#/ - # public void save(@Nullable Parser parser, String fileName) - # throws IOException, PrintException - # { - # List ruleNames = parser != null ? Arrays.asList(parser.getRuleNames()) : null; - # save(ruleNames, fileName); - # } - # - # # Save this tree in a postscript file using a particular font name and size#/ - # public void save(@Nullable Parser parser, String fileName, - # String fontName, int fontSize) - # throws IOException - # { - # List ruleNames = parser != null ? Arrays.asList(parser.getRuleNames()) : null; - # save(ruleNames, fileName, fontName, fontSize); - # } - # - # # Save this tree in a postscript file#/ - # public void save(@Nullable List ruleNames, String fileName) - # throws IOException, PrintException - # { - # Trees.writePS(this, ruleNames, fileName); - # } - # - # # Save this tree in a postscript file using a particular font name and size#/ - # public void save(@Nullable List ruleNames, String fileName, - # String fontName, int fontSize) - # throws IOException - # { - # Trees.writePS(this, ruleNames, fileName, fontName, fontSize); - # } - # - # # Print out a whole tree, not just a node, in LISP format - # # (root child1 .. childN). Print just a node if this is a leaf. - # # We have to know the recognizer so we can get rule names. - # #/ - # @Override - # public String toStringTree(@Nullable Parser recog) { - # return Trees.toStringTree(this, recog); - # } - # - # Print out a whole tree, not just a node, in LISP format - # (root child1 .. childN). Print just a node if this is a leaf. - # - def toStringTree(self, ruleNames=None, recog=None): - return Trees.toStringTree(self, ruleNames=ruleNames, recog=recog) - # } - # - # @Override - # public String toStringTree() { - # return toStringTree((List)null); - # } - # - def __unicode__(self): - return self.toString(None, None) - - # @Override - # public String toString() { - # return toString((List)null, (RuleContext)null); - # } - # - # public final String toString(@Nullable Recognizer recog) { - # return toString(recog, ParserRuleContext.EMPTY); - # } - # - # public final String toString(@Nullable List ruleNames) { - # return toString(ruleNames, null); - # } - # - # // recog null unless ParserRuleContext, in which case we use subclass toString(...) - # public String toString(@Nullable Recognizer recog, @Nullable RuleContext stop) { - # String[] ruleNames = recog != null ? recog.getRuleNames() : null; - # List ruleNamesList = ruleNames != null ? Arrays.asList(ruleNames) : null; - # return toString(ruleNamesList, stop); - # } - - def toString(self, ruleNames, stop): - with StringIO() as buf: - p = self - buf.write(u"[") - while p is not None and p is not stop: - if ruleNames is None: - if not p.isEmpty(): - buf.write(unicode(p.invokingState)) - else: - ri = p.getRuleIndex() - ruleName = ruleNames[ri] if ri >= 0 and ri < len(ruleNames) else unicode(ri) - buf.write(ruleName) - - if p.parentCtx is not None and (ruleNames is not None or not p.parentCtx.isEmpty()): - buf.write(u" ") - - p = p.parentCtx - - buf.write(u"]") - return buf.getvalue() - diff --git a/runtime/Python2/src/antlr4/StdinStream.py b/runtime/Python2/src/antlr4/StdinStream.py deleted file mode 100644 index a3a03aa615..0000000000 --- a/runtime/Python2/src/antlr4/StdinStream.py +++ /dev/null @@ -1,21 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - -# -# This is an InputStream that is loaded from stdin all at once -# when you construct the object. -# - -import codecs -import sys -from antlr4.InputStream import InputStream - -class StdinStream(InputStream): - - def __init__(self, encoding='ascii', errors='strict'): - bytes = sys.stdin.read() - data = codecs.decode(bytes, encoding, errors) - super(type(self), self).__init__(data) diff --git a/runtime/Python2/src/antlr4/Token.py b/runtime/Python2/src/antlr4/Token.py deleted file mode 100644 index 2ab4991717..0000000000 --- a/runtime/Python2/src/antlr4/Token.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - -# A token has properties: text, type, line, character position in the line -# (so we can ignore tabs), token channel, index, and source from which -# we obtained this token. -from io import StringIO - - -class Token (object): - - INVALID_TYPE = 0 - - # During lookahead operations, this "token" signifies we hit rule end ATN state - # and did not follow it despite needing to. - EPSILON = -2 - - MIN_USER_TOKEN_TYPE = 1 - - EOF = -1 - - # All tokens go to the parser (unless skip() is called in that rule) - # on a particular "channel". The parser tunes to a particular channel - # so that whitespace etc... can go to the parser on a "hidden" channel. - - DEFAULT_CHANNEL = 0 - - # Anything on different channel than DEFAULT_CHANNEL is not parsed - # by parser. - - HIDDEN_CHANNEL = 1 - - def __init__(self): - self.source = None - self.type = None # token type of the token - self.channel = None # The parser ignores everything not on DEFAULT_CHANNEL - self.start = None # optional; return -1 if not implemented. - self.stop = None # optional; return -1 if not implemented. - self.tokenIndex = None # from 0..n-1 of the token object in the input stream - self.line = None # line=1..n of the 1st character - self.column = None # beginning of the line at which it occurs, 0..n-1 - self._text = None # text of the token. - - @property - def text(self): - return self._text - - # Explicitly set the text for this token. If {code text} is not - # {@code null}, then {@link #getText} will return this value rather than - # extracting the text from the input. - # - # @param text The explicit text of the token, or {@code null} if the text - # should be obtained from the input along with the start and stop indexes - # of the token. - - @text.setter - def text(self, text): - self._text = text - - - def getTokenSource(self): - return self.source[0] - - def getInputStream(self): - return self.source[1] - - def __str__(self): - return unicode(self) - - -class CommonToken(Token): - - - # An empty {@link Pair} which is used as the default value of - # {@link #source} for tokens that do not have a source. - EMPTY_SOURCE = (None, None) - - def __init__(self, source = EMPTY_SOURCE, type = None, channel=Token.DEFAULT_CHANNEL, start=-1, stop=-1): - super(CommonToken, self).__init__() - self.source = source - self.type = type - self.channel = channel - self.start = start - self.stop = stop - self.tokenIndex = -1 - if source[0] is not None: - self.line = source[0].line - self.column = source[0].column - else: - self.column = -1 - - # Constructs a new {@link CommonToken} as a copy of another {@link Token}. - # - #

      - # If {@code oldToken} is also a {@link CommonToken} instance, the newly - # constructed token will share a reference to the {@link #text} field and - # the {@link Pair} stored in {@link #source}. Otherwise, {@link #text} will - # be assigned the result of calling {@link #getText}, and {@link #source} - # will be constructed from the result of {@link Token#getTokenSource} and - # {@link Token#getInputStream}.

      - # - # @param oldToken The token to copy. - # - def clone(self): - t = CommonToken(self.source, self.type, self.channel, self.start, self.stop) - t.tokenIndex = self.tokenIndex - t.line = self.line - t.column = self.column - t.text = self.text - return t - - @property - def text(self): - if self._text is not None: - return self._text - input = self.getInputStream() - if input is None: - return None - n = input.size - if self.start < n and self.stop < n: - return input.getText(self.start, self.stop) - else: - return u"" - - @text.setter - def text(self, text): - self._text = text - - def __unicode__(self): - with StringIO() as buf: - buf.write(u"[@") - buf.write(unicode(self.tokenIndex)) - buf.write(u",") - buf.write(unicode(self.start)) - buf.write(u":") - buf.write(unicode(self.stop)) - buf.write(u"='") - txt = self.text - if txt is not None: - txt = txt.replace(u"\n",u"\\n") - txt = txt.replace(u"\r",u"\\r") - txt = txt.replace(u"\t",u"\\t") - else: - txt = u"" - buf.write(txt) - buf.write(u"',<") - buf.write(unicode(self.type)) - buf.write(u">") - if self.channel > 0: - buf.write(u",channel=") - buf.write(unicode(self.channel)) - buf.write(u",") - buf.write(unicode(self.line)) - buf.write(u":") - buf.write(unicode(self.column)) - buf.write(u"]") - return buf.getvalue() diff --git a/runtime/Python2/src/antlr4/TokenStreamRewriter.py b/runtime/Python2/src/antlr4/TokenStreamRewriter.py deleted file mode 100644 index 5e7ec68270..0000000000 --- a/runtime/Python2/src/antlr4/TokenStreamRewriter.py +++ /dev/null @@ -1,252 +0,0 @@ -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. - -from StringIO import StringIO -from antlr4.Token import Token - -from antlr4.CommonTokenStream import CommonTokenStream - -from antlr4.IntervalSet import Interval - - -class TokenStreamRewriter(object): - DEFAULT_PROGRAM_NAME = "default" - PROGRAM_INIT_SIZE = 100 - MIN_TOKEN_INDEX = 0 - - def __init__(self, tokens): - """ - :type tokens: antlr4.BufferedTokenStream.BufferedTokenStream - :param tokens: - :return: - """ - super(TokenStreamRewriter, self).__init__() - self.tokens = tokens - self.programs = {self.DEFAULT_PROGRAM_NAME: []} - self.lastRewriteTokenIndexes = {} - - def getTokenStream(self): - return self.tokens - - def rollback(self, instruction_index, program_name): - ins = self.programs.get(program_name, None) - if ins: - self.programs[program_name] = ins[self.MIN_TOKEN_INDEX: instruction_index] - - def deleteProgram(self, program_name=DEFAULT_PROGRAM_NAME): - self.rollback(self.MIN_TOKEN_INDEX, program_name) - - def insertAfterToken(self, token, text, program_name=DEFAULT_PROGRAM_NAME): - self.insertAfter(token.tokenIndex, text, program_name) - - def insertAfter(self, index, text, program_name=DEFAULT_PROGRAM_NAME): - op = self.InsertAfterOp(self.tokens, index + 1, text) - rewrites = self.getProgram(program_name) - op.instructionIndex = len(rewrites) - rewrites.append(op) - - def insertBeforeIndex(self, index, text): - self.insertBefore(self.DEFAULT_PROGRAM_NAME, index, text) - - def insertBeforeToken(self, token, text, program_name=DEFAULT_PROGRAM_NAME): - self.insertBefore(program_name, token.tokenIndex, text) - - def insertBefore(self, program_name, index, text): - op = self.InsertBeforeOp(self.tokens, index, text) - rewrites = self.getProgram(program_name) - op.instructionIndex = len(rewrites) - rewrites.append(op) - - def replaceIndex(self, index, text): - self.replace(self.DEFAULT_PROGRAM_NAME, index, index, text) - - def replaceRange(self, from_idx, to_idx, text): - self.replace(self.DEFAULT_PROGRAM_NAME, from_idx, to_idx, text) - - def replaceSingleToken(self, token, text): - self.replace(self.DEFAULT_PROGRAM_NAME, token.tokenIndex, token.tokenIndex, text) - - def replaceRangeTokens(self, from_token, to_token, text, program_name=DEFAULT_PROGRAM_NAME): - self.replace(program_name, from_token.tokenIndex, to_token.tokenIndex, text) - - def replace(self, program_name, from_idx, to_idx, text): - if any((from_idx > to_idx, from_idx < 0, to_idx < 0, to_idx >= len(self.tokens.tokens))): - raise ValueError( - 'replace: range invalid: {}..{}(size={})'.format(from_idx, to_idx, len(self.tokens.tokens))) - op = self.ReplaceOp(from_idx, to_idx, self.tokens, text) - rewrites = self.getProgram(program_name) - op.instructionIndex = len(rewrites) - rewrites.append(op) - - def deleteToken(self, token): - self.delete(self.DEFAULT_PROGRAM_NAME, token, token) - - def deleteIndex(self, index): - self.delete(self.DEFAULT_PROGRAM_NAME, index, index) - - def delete(self, program_name, from_idx, to_idx): - if isinstance(from_idx, Token): - self.replace(program_name, from_idx.tokenIndex, to_idx.tokenIndex, "") - else: - self.replace(program_name, from_idx, to_idx, "") - - def lastRewriteTokenIndex(self, program_name=DEFAULT_PROGRAM_NAME): - return self.lastRewriteTokenIndexes.get(program_name, -1) - - def setLastRewriteTokenIndex(self, program_name, i): - self.lastRewriteTokenIndexes[program_name] = i - - def getProgram(self, program_name): - return self.programs.setdefault(program_name, []) - - def getDefaultText(self): - return self.getText(self.DEFAULT_PROGRAM_NAME, 0, len(self.tokens.tokens) - 1) - - def getText(self, program_name, start, stop): - """ - :return: the text in tokens[start, stop](closed interval) - """ - rewrites = self.programs.get(program_name) - - # ensure start/end are in range - if stop > len(self.tokens.tokens) - 1: - stop = len(self.tokens.tokens) - 1 - if start < 0: - start = 0 - - # if no instructions to execute - if not rewrites: return self.tokens.getText(start, stop) - buf = StringIO() - indexToOp = self._reduceToSingleOperationPerIndex(rewrites) - i = start - while all((i <= stop, i < len(self.tokens.tokens))): - op = indexToOp.pop(i, None) - token = self.tokens.get(i) - if op is None: - if token.type != Token.EOF: buf.write(token.text) - i += 1 - else: - i = op.execute(buf) - - if stop == len(self.tokens.tokens) - 1: - for op in indexToOp.values(): - if op.index >= len(self.tokens.tokens) - 1: buf.write( - op.text) # TODO: this check is probably not needed - - return buf.getvalue() - - def _reduceToSingleOperationPerIndex(self, rewrites): - # Walk replaces - for i, rop in enumerate(rewrites): - if any((rop is None, not isinstance(rop, TokenStreamRewriter.ReplaceOp))): - continue - # Wipe prior inserts within range - inserts = [op for op in rewrites[:i] if type(op) is TokenStreamRewriter.InsertBeforeOp] - for iop in inserts: - if iop.index == rop.index: - rewrites[iop.instructionIndex] = None - rop.text = '{}{}'.format(iop.text, rop.text) - elif all((iop.index > rop.index, iop.index <= rop.last_index)): - rewrites[iop.instructionIndex] = None - - # Drop any prior replaces contained within - prevReplaces = [op for op in rewrites[:i] if type(op) is TokenStreamRewriter.ReplaceOp] - for prevRop in prevReplaces: - if all((prevRop.index >= rop.index, prevRop.last_index <= rop.last_index)): - rewrites[prevRop.instructionIndex] = None - continue - isDisjoint = any((prevRop.last_index < rop.index, prevRop.index > rop.last_index)) - if all((prevRop.text is None, rop.text is None, not isDisjoint)): - rewrites[prevRop.instructionIndex] = None - rop.index = min(prevRop.index, rop.index) - rop.last_index = min(prevRop.last_index, rop.last_index) - print('New rop {}'.format(rop)) - elif (not(isDisjoint)): - raise ValueError("replace op boundaries of {} overlap with previous {}".format(rop, prevRop)) - - # Walk inserts before - for i, iop in enumerate(rewrites): - if any((iop is None, not isinstance(iop, TokenStreamRewriter.InsertBeforeOp))): - continue - prevInserts = [op for op in rewrites[:i] if isinstance(op, TokenStreamRewriter.InsertBeforeOp)] - for prev_index, prevIop in enumerate(prevInserts): - if prevIop.index == iop.index and type(prevIop) is TokenStreamRewriter.InsertBeforeOp: - iop.text += prevIop.text - rewrites[prev_index] = None - elif prevIop.index == iop.index and type(prevIop) is TokenStreamRewriter.InsertAfterOp: - iop.text = prevIop.text + iop.text - rewrites[prev_index] = None - # look for replaces where iop.index is in range; error - prevReplaces = [op for op in rewrites[:i] if type(op) is TokenStreamRewriter.ReplaceOp] - for rop in prevReplaces: - if iop.index == rop.index: - rop.text = iop.text + rop.text - rewrites[i] = None - continue - if all((iop.index >= rop.index, iop.index <= rop.last_index)): - raise ValueError("insert op {} within boundaries of previous {}".format(iop, rop)) - - reduced = {} - for i, op in enumerate(rewrites): - if op is None: continue - if reduced.get(op.index): raise ValueError('should be only one op per index') - reduced[op.index] = op - - return reduced - - class RewriteOperation(object): - - def __init__(self, tokens, index, text=""): - """ - :type tokens: CommonTokenStream - :param tokens: - :param index: - :param text: - :return: - """ - self.tokens = tokens - self.index = index - self.text = text - self.instructionIndex = 0 - - def execute(self, buf): - """ - :type buf: StringIO.StringIO - :param buf: - :return: - """ - return self.index - - def __str__(self): - return '<{}@{}:"{}">'.format(self.__class__.__name__, self.tokens.get(self.index), self.text) - - class InsertBeforeOp(RewriteOperation): - - def __init__(self, tokens, index, text=""): - super(TokenStreamRewriter.InsertBeforeOp, self).__init__(tokens, index, text) - - def execute(self, buf): - buf.write(self.text) - if self.tokens.get(self.index).type != Token.EOF: - buf.write(self.tokens.get(self.index).text) - return self.index + 1 - - class InsertAfterOp(InsertBeforeOp): - pass - - class ReplaceOp(RewriteOperation): - - def __init__(self, from_idx, to_idx, tokens, text): - super(TokenStreamRewriter.ReplaceOp, self).__init__(tokens, from_idx, text) - self.last_index = to_idx - - def execute(self, buf): - if self.text: - buf.write(self.text) - return self.last_index + 1 - - def __str__(self): - if self.text: - return ''.format(self.tokens.get(self.index), self.tokens.get(self.last_index), - self.text) diff --git a/runtime/Python2/src/antlr4/Utils.py b/runtime/Python2/src/antlr4/Utils.py deleted file mode 100644 index 4255285c9d..0000000000 --- a/runtime/Python2/src/antlr4/Utils.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - -from io import StringIO - -def str_collection(val, begin, end): - with StringIO() as buf: - buf.write(begin) - first = True - for item in val: - if not first: - buf.write(u', ') - buf.write(unicode(item)) - first = False - buf.write(end) - return buf.getvalue() - -def str_list(val): - return str_collection(val, u'[', u']') - -def str_set(val): - return str_collection(val, u'{', u'}') - -def escapeWhitespace(s, escapeSpaces): - with StringIO() as buf: - for c in s: - if c==' ' and escapeSpaces: - buf.write(u'\u00B7') - elif c=='\t': - buf.write(u"\\t") - elif c=='\n': - buf.write(u"\\n") - elif c=='\r': - buf.write(u"\\r") - else: - buf.write(unicode(c)) - return buf.getvalue() diff --git a/runtime/Python2/src/antlr4/__init__.py b/runtime/Python2/src/antlr4/__init__.py deleted file mode 100644 index 37ce0bb5db..0000000000 --- a/runtime/Python2/src/antlr4/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from antlr4.Token import Token -from antlr4.InputStream import InputStream -from antlr4.FileStream import FileStream -from antlr4.StdinStream import StdinStream -from antlr4.BufferedTokenStream import TokenStream -from antlr4.CommonTokenStream import CommonTokenStream -from antlr4.Lexer import Lexer -from antlr4.Parser import Parser -from antlr4.dfa.DFA import DFA -from antlr4.atn.ATN import ATN -from antlr4.atn.ATNDeserializer import ATNDeserializer -from antlr4.atn.LexerATNSimulator import LexerATNSimulator -from antlr4.atn.ParserATNSimulator import ParserATNSimulator -from antlr4.atn.PredictionMode import PredictionMode -from antlr4.PredictionContext import PredictionContextCache -from antlr4.ParserRuleContext import ParserRuleContext -from antlr4.tree.Tree import ParseTreeListener, ParseTreeVisitor, ParseTreeWalker, TerminalNode, ErrorNode, RuleNode -from antlr4.error.Errors import RecognitionException, IllegalStateException, NoViableAltException -from antlr4.error.ErrorStrategy import BailErrorStrategy -from antlr4.error.DiagnosticErrorListener import DiagnosticErrorListener -from antlr4.Utils import str_list \ No newline at end of file diff --git a/runtime/Python2/src/antlr4/atn/ATN.py b/runtime/Python2/src/antlr4/atn/ATN.py deleted file mode 100644 index e4d53920b4..0000000000 --- a/runtime/Python2/src/antlr4/atn/ATN.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -#/ -from antlr4.IntervalSet import IntervalSet -from antlr4.Token import Token - - -class ATN(object): - - INVALID_ALT_NUMBER = 0 - - # Used for runtime deserialization of ATNs from strings#/ - def __init__(self, grammarType , maxTokenType ): - # The type of the ATN. - self.grammarType = grammarType - # The maximum value for any symbol recognized by a transition in the ATN. - self.maxTokenType = maxTokenType - self.states = [] - # Each subrule/rule is a decision point and we must track them so we - # can go back later and build DFA predictors for them. This includes - # all the rules, subrules, optional blocks, ()+, ()* etc... - self.decisionToState = [] - # Maps from rule index to starting state number. - self.ruleToStartState = [] - # Maps from rule index to stop state number. - self.ruleToStopState = None - self.modeNameToStartState = dict() - # For lexer ATNs, this maps the rule index to the resulting token type. - # For parser ATNs, this maps the rule index to the generated bypass token - # type if the - # {@link ATNDeserializationOptions#isGenerateRuleBypassTransitions} - # deserialization option was specified; otherwise, this is {@code null}. - self.ruleToTokenType = None - # For lexer ATNs, this is an array of {@link LexerAction} objects which may - # be referenced by action transitions in the ATN. - self.lexerActions = None - self.modeToStartState = [] - - # Compute the set of valid tokens that can occur starting in state {@code s}. - # If {@code ctx} is null, the set of tokens will not include what can follow - # the rule surrounding {@code s}. In other words, the set will be - # restricted to tokens reachable staying within {@code s}'s rule. - def nextTokensInContext(self, s, ctx): - from antlr4.LL1Analyzer import LL1Analyzer - anal = LL1Analyzer(self) - return anal.LOOK(s, ctx=ctx) - - # Compute the set of valid tokens that can occur starting in {@code s} and - # staying in same rule. {@link Token#EPSILON} is in set if we reach end of - # rule. - def nextTokensNoContext(self, s): - if s.nextTokenWithinRule is not None: - return s.nextTokenWithinRule - s.nextTokenWithinRule = self.nextTokensInContext(s, None) - s.nextTokenWithinRule.readonly = True - return s.nextTokenWithinRule - - def nextTokens(self, s, ctx = None): - if ctx==None: - return self.nextTokensNoContext(s) - else: - return self.nextTokensInContext(s, ctx) - - def addState(self, state): - if state is not None: - state.atn = self - state.stateNumber = len(self.states) - self.states.append(state) - - def removeState(self, state): - self.states[state.stateNumber] = None # just free mem, don't shift states in list - - def defineDecisionState(self, s): - self.decisionToState.append(s) - s.decision = len(self.decisionToState)-1 - return s.decision - - def getDecisionState(self, decision): - if len(self.decisionToState)==0: - return None - else: - return self.decisionToState[decision] - - # Computes the set of input symbols which could follow ATN state number - # {@code stateNumber} in the specified full {@code context}. This method - # considers the complete parser context, but does not evaluate semantic - # predicates (i.e. all predicates encountered during the calculation are - # assumed true). If a path in the ATN exists from the starting state to the - # {@link RuleStopState} of the outermost context without matching any - # symbols, {@link Token#EOF} is added to the returned set. - # - #

      If {@code context} is {@code null}, it is treated as - # {@link ParserRuleContext#EMPTY}.

      - # - # @param stateNumber the ATN state number - # @param context the full parse context - # @return The set of potentially valid input symbols which could follow the - # specified state in the specified context. - # @throws IllegalArgumentException if the ATN does not contain a state with - # number {@code stateNumber} - #/ - def getExpectedTokens(self, stateNumber, ctx ): - if stateNumber < 0 or stateNumber >= len(self.states): - raise Exception("Invalid state number.") - s = self.states[stateNumber] - following = self.nextTokens(s) - if Token.EPSILON not in following: - return following - expected = IntervalSet() - expected.addSet(following) - expected.removeOne(Token.EPSILON) - while (ctx != None and ctx.invokingState >= 0 and Token.EPSILON in following): - invokingState = self.states[ctx.invokingState] - rt = invokingState.transitions[0] - following = self.nextTokens(rt.followState) - expected.addSet(following) - expected.removeOne(Token.EPSILON) - ctx = ctx.parentCtx - if Token.EPSILON in following: - expected.addOne(Token.EOF) - return expected \ No newline at end of file diff --git a/runtime/Python2/src/antlr4/atn/ATNConfig.py b/runtime/Python2/src/antlr4/atn/ATNConfig.py deleted file mode 100644 index a14e8498b4..0000000000 --- a/runtime/Python2/src/antlr4/atn/ATNConfig.py +++ /dev/null @@ -1,149 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -#/ - -# A tuple: (ATN state, predicted alt, syntactic, semantic context). -# The syntactic context is a graph-structured stack node whose -# path(s) to the root is the rule invocation(s) -# chain used to arrive at the state. The semantic context is -# the tree of semantic predicates encountered before reaching -# an ATN state. -#/ -from io import StringIO -from antlr4.atn.ATNState import ATNState, DecisionState -from antlr4.atn.SemanticContext import SemanticContext - -class ATNConfig(object): - - def __init__(self, state=None, alt=None, context=None, semantic=None, config=None): - if config is not None: - if state is None: - state = config.state - if alt is None: - alt = config.alt - if context is None: - context = config.context - if semantic is None: - semantic = config.semanticContext - if semantic is None: - semantic = SemanticContext.NONE - - # The ATN state associated with this configuration#/ - self.state = state - # What alt (or lexer rule) is predicted by this configuration#/ - self.alt = alt - # The stack of invoking states leading to the rule/states associated - # with this config. We track only those contexts pushed during - # execution of the ATN simulator. - self.context = context - self.semanticContext = semantic - # We cannot execute predicates dependent upon local context unless - # we know for sure we are in the correct context. Because there is - # no way to do this efficiently, we simply cannot evaluate - # dependent predicates unless we are in the rule that initially - # invokes the ATN simulator. - # - # closure() tracks the depth of how far we dip into the - # outer context: depth > 0. Note that it may not be totally - # accurate depth since I don't ever decrement. TODO: make it a boolean then - self.reachesIntoOuterContext = 0 if config is None else config.reachesIntoOuterContext - self.precedenceFilterSuppressed = False if config is None else config.precedenceFilterSuppressed - - # An ATN configuration is equal to another if both have - # the same state, they predict the same alternative, and - # syntactic/semantic contexts are the same. - #/ - def __eq__(self, other): - if self is other: - return True - elif not isinstance(other, ATNConfig): - return False - else: - return self.state.stateNumber==other.state.stateNumber \ - and self.alt==other.alt \ - and ((self.context is other.context) or (self.context==other.context)) \ - and self.semanticContext==other.semanticContext \ - and self.precedenceFilterSuppressed==other.precedenceFilterSuppressed - - def __hash__(self): - return hash((self.state.stateNumber, self.alt, self.context, self.semanticContext)) - - def hashCodeForConfigSet(self): - return hash((self.state.stateNumber, self.alt, hash(self.semanticContext))) - - def equalsForConfigSet(self, other): - if self is other: - return True - elif not isinstance(other, ATNConfig): - return False - else: - return self.state.stateNumber==other.state.stateNumber \ - and self.alt==other.alt \ - and self.semanticContext==other.semanticContext - - def __str__(self): - return unicode(self) - - def __unicode__(self): - with StringIO() as buf: - buf.write(u"(") - buf.write(unicode(self.state)) - buf.write(u",") - buf.write(unicode(self.alt)) - if self.context is not None: - buf.write(u",[") - buf.write(unicode(self.context)) - buf.write(u"]") - if self.semanticContext is not None and self.semanticContext is not SemanticContext.NONE: - buf.write(u",") - buf.write(unicode(self.semanticContext)) - if self.reachesIntoOuterContext>0: - buf.write(u",up=") - buf.write(unicode(self.reachesIntoOuterContext)) - buf.write(u')') - return buf.getvalue() - -class LexerATNConfig(ATNConfig): - - def __init__(self, state, alt=None, context=None, semantic=SemanticContext.NONE, lexerActionExecutor=None, config=None): - super(LexerATNConfig, self).__init__(state=state, alt=alt, context=context, semantic=semantic, config=config) - if config is not None: - if lexerActionExecutor is None: - lexerActionExecutor = config.lexerActionExecutor - # This is the backing field for {@link #getLexerActionExecutor}. - self.lexerActionExecutor = lexerActionExecutor - self.passedThroughNonGreedyDecision = False if config is None else self.checkNonGreedyDecision(config, state) - - def __hash__(self): - return hash((self.state.stateNumber, self.alt, self.context, - self.semanticContext, self.passedThroughNonGreedyDecision, - self.lexerActionExecutor)) - - def __eq__(self, other): - if self is other: - return True - elif not isinstance(other, LexerATNConfig): - return False - if self.passedThroughNonGreedyDecision != other.passedThroughNonGreedyDecision: - return False - if not(self.lexerActionExecutor==other.lexerActionExecutor): - return False - return super(LexerATNConfig, self).__eq__(other) - - - - def hashCodeForConfigSet(self): - return hash(self) - - - - def equalsForConfigSet(self, other): - return self==other - - - - def checkNonGreedyDecision(self, source, target): - return source.passedThroughNonGreedyDecision \ - or isinstance(target, DecisionState) and target.nonGreedy diff --git a/runtime/Python2/src/antlr4/atn/ATNConfigSet.py b/runtime/Python2/src/antlr4/atn/ATNConfigSet.py deleted file mode 100755 index 618dcb8044..0000000000 --- a/runtime/Python2/src/antlr4/atn/ATNConfigSet.py +++ /dev/null @@ -1,210 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. - -from antlr4.PredictionContext import merge -from antlr4.Utils import str_list -from antlr4.atn.ATN import ATN -from antlr4.atn.SemanticContext import SemanticContext -from antlr4.error.Errors import UnsupportedOperationException, IllegalStateException -# -# Specialized {@link Set}{@code <}{@link ATNConfig}{@code >} that can track -# info about the set, with support for combining similar configurations using a -# graph-structured stack. -# / -from functools import reduce -from io import StringIO - - -class ATNConfigSet(object): - # - # The reason that we need this is because we don't want the hash map to use - # the standard hash code and equals. We need all configurations with the same - # {@code (s,i,_,semctx)} to be equal. Unfortunately, this key effectively doubles - # the number of objects associated with ATNConfigs. The other solution is to - # use a hash table that lets us specify the equals/hashcode operation. - - def __init__(self, fullCtx=True): - # All configs but hashed by (s, i, _, pi) not including context. Wiped out - # when we go readonly as this set becomes a DFA state. - self.configLookup = dict() - # Indicates that this configuration set is part of a full context - # LL prediction. It will be used to determine how to merge $. With SLL - # it's a wildcard whereas it is not for LL context merge. - self.fullCtx = fullCtx - # Indicates that the set of configurations is read-only. Do not - # allow any code to manipulate the set; DFA states will point at - # the sets and they must not change. This does not protect the other - # fields; in particular, conflictingAlts is set after - # we've made this readonly. - self.readonly = False - # Track the elements as they are added to the set; supports get(i)#/ - self.configs = [] - - # TODO: these fields make me pretty uncomfortable but nice to pack up info together, saves recomputation - # TODO: can we track conflicts as they are added to save scanning configs later? - self.uniqueAlt = 0 - self.conflictingAlts = None - - # Used in parser and lexer. In lexer, it indicates we hit a pred - # while computing a closure operation. Don't make a DFA state from this. - self.hasSemanticContext = False - self.dipsIntoOuterContext = False - - self.cachedHashCode = -1 - - def __iter__(self): - return self.configs.__iter__() - - # Adding a new config means merging contexts with existing configs for - # {@code (s, i, pi, _)}, where {@code s} is the - # {@link ATNConfig#state}, {@code i} is the {@link ATNConfig#alt}, and - # {@code pi} is the {@link ATNConfig#semanticContext}. We use - # {@code (s,i,pi)} as key. - # - #

      This method updates {@link #dipsIntoOuterContext} and - # {@link #hasSemanticContext} when necessary.

      - #/ - def add(self, config, mergeCache=None): - if self.readonly: - raise Exception("This set is readonly") - if config.semanticContext is not SemanticContext.NONE: - self.hasSemanticContext = True - if config.reachesIntoOuterContext > 0: - self.dipsIntoOuterContext = True - existing = self.getOrAdd(config) - if existing is config: - self.cachedHashCode = -1 - self.configs.append(config) # track order here - return True - # a previous (s,i,pi,_), merge with it and save result - rootIsWildcard = not self.fullCtx - merged = merge(existing.context, config.context, rootIsWildcard, mergeCache) - # no need to check for existing.context, config.context in cache - # since only way to create new graphs is "call rule" and here. - # We cache at both places. - existing.reachesIntoOuterContext = max(existing.reachesIntoOuterContext, config.reachesIntoOuterContext) - # make sure to preserve the precedence filter suppression during the merge - if config.precedenceFilterSuppressed: - existing.precedenceFilterSuppressed = True - existing.context = merged # replace context; no need to alt mapping - return True - - def getOrAdd(self, config): - h = config.hashCodeForConfigSet() - l = self.configLookup.get(h, None) - if l is not None: - r = next((c for c in l if config.equalsForConfigSet(c)), None) - if r is not None: - return r - if l is None: - l = [config] - self.configLookup[h] = l - else: - l.append(config) - return config - - def getStates(self): - return set(cfg.state for cfg in self.configs) - - def getPredicates(self): - return [cfg.semanticContext for cfg in self.configs if cfg.semanticContext!=SemanticContext.NONE] - - def get(self, i): - return self.configs[i] - - def optimizeConfigs(self, interpreter): - if self.readonly: - raise IllegalStateException("This set is readonly") - if len(self.configs)==0: - return - for config in self.configs: - config.context = interpreter.getCachedContext(config.context) - - def addAll(self, coll): - for c in coll: - self.add(c) - return False - - def __eq__(self, other): - if self is other: - return True - elif not isinstance(other, ATNConfigSet): - return False - - same = self.configs is not None and \ - self.configs == other.configs and \ - self.fullCtx == other.fullCtx and \ - self.uniqueAlt == other.uniqueAlt and \ - self.conflictingAlts == other.conflictingAlts and \ - self.hasSemanticContext == other.hasSemanticContext and \ - self.dipsIntoOuterContext == other.dipsIntoOuterContext - - return same - - def __hash__(self): - if self.readonly: - if self.cachedHashCode == -1: - self.cachedHashCode = self.hashConfigs() - return self.cachedHashCode - return self.hashConfigs() - - def hashConfigs(self): - return reduce(lambda h, cfg: hash((h, cfg)), self.configs, 0) - - def __len__(self): - return len(self.configs) - - def isEmpty(self): - return len(self.configs)==0 - - def __contains__(self, config): - if self.configLookup is None: - raise UnsupportedOperationException("This method is not implemented for readonly sets.") - h = config.hashCodeForConfigSet() - l = self.configLookup.get(h, None) - if l is not None: - for c in l: - if config.equalsForConfigSet(c): - return True - return False - - def clear(self): - if self.readonly: - raise IllegalStateException("This set is readonly") - self.configs.clear() - self.cachedHashCode = -1 - self.configLookup.clear() - - def setReadonly(self, readonly): - self.readonly = readonly - self.configLookup = None # can't mod, no need for lookup cache - - def __str__(self): - return unicode(self) - - def __unicode__(self): - with StringIO() as buf: - buf.write(str_list(self.configs)) - if self.hasSemanticContext: - buf.write(u",hasSemanticContext=") - buf.write(unicode(self.hasSemanticContext).lower()) - if self.uniqueAlt!=ATN.INVALID_ALT_NUMBER: - buf.write(u",uniqueAlt=") - buf.write(unicode(self.uniqueAlt)) - if self.conflictingAlts is not None: - buf.write(u",conflictingAlts=") - buf.write(u"{"+unicode(', '.join(str(a) for a in self.conflictingAlts))+u"}") - if self.dipsIntoOuterContext: - buf.write(u",dipsIntoOuterContext") - return buf.getvalue() - - -class OrderedATNConfigSet(ATNConfigSet): - - def __init__(self): - super(OrderedATNConfigSet, self).__init__() - - - diff --git a/runtime/Python2/src/antlr4/atn/ATNDeserializationOptions.py b/runtime/Python2/src/antlr4/atn/ATNDeserializationOptions.py deleted file mode 100644 index cecc3fa2b0..0000000000 --- a/runtime/Python2/src/antlr4/atn/ATNDeserializationOptions.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. - -class ATNDeserializationOptions(object): - - defaultOptions = None - - def __init__(self, copyFrom = None): - self.readonly = False - self.verifyATN = True if copyFrom is None else copyFrom.verifyATN - self.generateRuleBypassTransitions = False if copyFrom is None else copyFrom.generateRuleBypassTransitions - - def __setattr__(self, key, value): - if key!="readonly" and self.readonly: - raise Exception("The object is read only.") - super(type(self), self).__setattr__(key,value) - -ATNDeserializationOptions.defaultOptions = ATNDeserializationOptions() -ATNDeserializationOptions.defaultOptions.readonly = True - diff --git a/runtime/Python2/src/antlr4/atn/ATNDeserializer.py b/runtime/Python2/src/antlr4/atn/ATNDeserializer.py deleted file mode 100644 index f8bc5133ca..0000000000 --- a/runtime/Python2/src/antlr4/atn/ATNDeserializer.py +++ /dev/null @@ -1,446 +0,0 @@ -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -#/ -from antlr4.atn.ATN import ATN -from antlr4.atn.ATNType import ATNType -from antlr4.atn.ATNState import * -from antlr4.atn.Transition import * -from antlr4.atn.LexerAction import * -from antlr4.atn.ATNDeserializationOptions import ATNDeserializationOptions - -SERIALIZED_VERSION = 4 - -class ATNDeserializer (object): - - def __init__(self, options = None): - if options is None: - options = ATNDeserializationOptions.defaultOptions - self.deserializationOptions = options - self.edgeFactories = None - self.stateFactories = None - self.actionFactories = None - - def deserialize(self, data): - self.data = data - self.pos = 0 - self.checkVersion() - atn = self.readATN() - self.readStates(atn) - self.readRules(atn) - self.readModes(atn) - sets = [] - self.readSets(atn, sets) - self.readEdges(atn, sets) - self.readDecisions(atn) - self.readLexerActions(atn) - self.markPrecedenceDecisions(atn) - self.verifyATN(atn) - if self.deserializationOptions.generateRuleBypassTransitions \ - and atn.grammarType == ATNType.PARSER: - self.generateRuleBypassTransitions(atn) - # re-verify after modification - self.verifyATN(atn) - return atn - - def checkVersion(self): - version = self.readInt() - if version != SERIALIZED_VERSION: - raise Exception("Could not deserialize ATN with version " + str(version) + " (expected " + str(SERIALIZED_VERSION) + ").") - - def readATN(self): - grammarType = self.readInt() - maxTokenType = self.readInt() - return ATN(grammarType, maxTokenType) - - def readStates(self, atn): - loopBackStateNumbers = [] - endStateNumbers = [] - nstates = self.readInt() - for i in range(0, nstates): - stype = self.readInt() - # ignore bad type of states - if stype==ATNState.INVALID_TYPE: - atn.addState(None) - continue - ruleIndex = self.readInt() - s = self.stateFactory(stype, ruleIndex) - if stype == ATNState.LOOP_END: # special case - loopBackStateNumber = self.readInt() - loopBackStateNumbers.append((s, loopBackStateNumber)) - elif isinstance(s, BlockStartState): - endStateNumber = self.readInt() - endStateNumbers.append((s, endStateNumber)) - - atn.addState(s) - - # delay the assignment of loop back and end states until we know all the state instances have been initialized - for pair in loopBackStateNumbers: - pair[0].loopBackState = atn.states[pair[1]] - - for pair in endStateNumbers: - pair[0].endState = atn.states[pair[1]] - - numNonGreedyStates = self.readInt() - for i in range(0, numNonGreedyStates): - stateNumber = self.readInt() - atn.states[stateNumber].nonGreedy = True - - numPrecedenceStates = self.readInt() - for i in range(0, numPrecedenceStates): - stateNumber = self.readInt() - atn.states[stateNumber].isPrecedenceRule = True - - def readRules(self, atn): - nrules = self.readInt() - if atn.grammarType == ATNType.LEXER: - atn.ruleToTokenType = [0] * nrules - - atn.ruleToStartState = [0] * nrules - for i in range(0, nrules): - s = self.readInt() - startState = atn.states[s] - atn.ruleToStartState[i] = startState - if atn.grammarType == ATNType.LEXER: - tokenType = self.readInt() - atn.ruleToTokenType[i] = tokenType - - atn.ruleToStopState = [0] * nrules - for state in atn.states: - if not isinstance(state, RuleStopState): - continue - atn.ruleToStopState[state.ruleIndex] = state - atn.ruleToStartState[state.ruleIndex].stopState = state - - def readModes(self, atn): - nmodes = self.readInt() - for i in range(0, nmodes): - s = self.readInt() - atn.modeToStartState.append(atn.states[s]) - - def readSets(self, atn, sets): - m = self.readInt() - for i in range(0, m): - iset = IntervalSet() - sets.append(iset) - n = self.readInt() - containsEof = self.readInt() - if containsEof!=0: - iset.addOne(-1) - for j in range(0, n): - i1 = self.readInt() - i2 = self.readInt() - iset.addRange(Interval(i1, i2 + 1)) # range upper limit is exclusive - - def readEdges(self, atn, sets): - nedges = self.readInt() - for i in range(0, nedges): - src = self.readInt() - trg = self.readInt() - ttype = self.readInt() - arg1 = self.readInt() - arg2 = self.readInt() - arg3 = self.readInt() - trans = self.edgeFactory(atn, ttype, src, trg, arg1, arg2, arg3, sets) - srcState = atn.states[src] - srcState.addTransition(trans) - - # edges for rule stop states can be derived, so they aren't serialized - for state in atn.states: - for i in range(0, len(state.transitions)): - t = state.transitions[i] - if not isinstance(t, RuleTransition): - continue - outermostPrecedenceReturn = -1 - if atn.ruleToStartState[t.target.ruleIndex].isPrecedenceRule: - if t.precedence == 0: - outermostPrecedenceReturn = t.target.ruleIndex - trans = EpsilonTransition(t.followState, outermostPrecedenceReturn) - atn.ruleToStopState[t.target.ruleIndex].addTransition(trans) - - for state in atn.states: - if isinstance(state, BlockStartState): - # we need to know the end state to set its start state - if state.endState is None: - raise Exception("IllegalState") - # block end states can only be associated to a single block start state - if state.endState.startState is not None: - raise Exception("IllegalState") - state.endState.startState = state - - elif isinstance(state, PlusLoopbackState): - for i in range(0, len(state.transitions)): - target = state.transitions[i].target - if isinstance(target, PlusBlockStartState): - target.loopBackState = state - elif isinstance(state, StarLoopbackState): - for i in range(0, len(state.transitions)): - target = state.transitions[i].target - if isinstance(target, StarLoopEntryState): - target.loopBackState = state - - def readDecisions(self, atn): - ndecisions = self.readInt() - for i in range(0, ndecisions): - s = self.readInt() - decState = atn.states[s] - atn.decisionToState.append(decState) - decState.decision = i - - def readLexerActions(self, atn): - if atn.grammarType == ATNType.LEXER: - count = self.readInt() - atn.lexerActions = [ None ] * count - for i in range(0, count): - actionType = self.readInt() - data1 = self.readInt() - data2 = self.readInt() - lexerAction = self.lexerActionFactory(actionType, data1, data2) - atn.lexerActions[i] = lexerAction - - def generateRuleBypassTransitions(self, atn): - - count = len(atn.ruleToStartState) - atn.ruleToTokenType = [ 0 ] * count - for i in range(0, count): - atn.ruleToTokenType[i] = atn.maxTokenType + i + 1 - - for i in range(0, count): - self.generateRuleBypassTransition(atn, i) - - def generateRuleBypassTransition(self, atn, idx): - - bypassStart = BasicBlockStartState() - bypassStart.ruleIndex = idx - atn.addState(bypassStart) - - bypassStop = BlockEndState() - bypassStop.ruleIndex = idx - atn.addState(bypassStop) - - bypassStart.endState = bypassStop - atn.defineDecisionState(bypassStart) - - bypassStop.startState = bypassStart - - excludeTransition = None - - if atn.ruleToStartState[idx].isPrecedenceRule: - # wrap from the beginning of the rule to the StarLoopEntryState - endState = None - for state in atn.states: - if self.stateIsEndStateFor(state, idx): - endState = state - excludeTransition = state.loopBackState.transitions[0] - break - - if excludeTransition is None: - raise Exception("Couldn't identify final state of the precedence rule prefix section.") - - else: - - endState = atn.ruleToStopState[idx] - - # all non-excluded transitions that currently target end state need to target blockEnd instead - for state in atn.states: - for transition in state.transitions: - if transition == excludeTransition: - continue - if transition.target == endState: - transition.target = bypassStop - - # all transitions leaving the rule start state need to leave blockStart instead - ruleToStartState = atn.ruleToStartState[idx] - count = len(ruleToStartState.transitions) - while count > 0: - bypassStart.addTransition(ruleToStartState.transitions[count-1]) - del ruleToStartState.transitions[-1] - - # link the new states - atn.ruleToStartState[idx].addTransition(EpsilonTransition(bypassStart)) - bypassStop.addTransition(EpsilonTransition(endState)) - - matchState = BasicState() - atn.addState(matchState) - matchState.addTransition(AtomTransition(bypassStop, atn.ruleToTokenType[idx])) - bypassStart.addTransition(EpsilonTransition(matchState)) - - - def stateIsEndStateFor(self, state, idx): - if state.ruleIndex != idx: - return None - if not isinstance(state, StarLoopEntryState): - return None - - maybeLoopEndState = state.transitions[len(state.transitions) - 1].target - if not isinstance(maybeLoopEndState, LoopEndState): - return None - - if maybeLoopEndState.epsilonOnlyTransitions and \ - isinstance(maybeLoopEndState.transitions[0].target, RuleStopState): - return state - else: - return None - - - # - # Analyze the {@link StarLoopEntryState} states in the specified ATN to set - # the {@link StarLoopEntryState#isPrecedenceDecision} field to the - # correct value. - # - # @param atn The ATN. - # - def markPrecedenceDecisions(self, atn): - for state in atn.states: - if not isinstance(state, StarLoopEntryState): - continue - - # We analyze the ATN to determine if this ATN decision state is the - # decision for the closure block that determines whether a - # precedence rule should continue or complete. - # - if atn.ruleToStartState[state.ruleIndex].isPrecedenceRule: - maybeLoopEndState = state.transitions[len(state.transitions) - 1].target - if isinstance(maybeLoopEndState, LoopEndState): - if maybeLoopEndState.epsilonOnlyTransitions and \ - isinstance(maybeLoopEndState.transitions[0].target, RuleStopState): - state.isPrecedenceDecision = True - - def verifyATN(self, atn): - if not self.deserializationOptions.verifyATN: - return - # verify assumptions - for state in atn.states: - if state is None: - continue - - self.checkCondition(state.epsilonOnlyTransitions or len(state.transitions) <= 1) - - if isinstance(state, PlusBlockStartState): - self.checkCondition(state.loopBackState is not None) - - if isinstance(state, StarLoopEntryState): - self.checkCondition(state.loopBackState is not None) - self.checkCondition(len(state.transitions) == 2) - - if isinstance(state.transitions[0].target, StarBlockStartState): - self.checkCondition(isinstance(state.transitions[1].target, LoopEndState)) - self.checkCondition(not state.nonGreedy) - elif isinstance(state.transitions[0].target, LoopEndState): - self.checkCondition(isinstance(state.transitions[1].target, StarBlockStartState)) - self.checkCondition(state.nonGreedy) - else: - raise Exception("IllegalState") - - if isinstance(state, StarLoopbackState): - self.checkCondition(len(state.transitions) == 1) - self.checkCondition(isinstance(state.transitions[0].target, StarLoopEntryState)) - - if isinstance(state, LoopEndState): - self.checkCondition(state.loopBackState is not None) - - if isinstance(state, RuleStartState): - self.checkCondition(state.stopState is not None) - - if isinstance(state, BlockStartState): - self.checkCondition(state.endState is not None) - - if isinstance(state, BlockEndState): - self.checkCondition(state.startState is not None) - - if isinstance(state, DecisionState): - self.checkCondition(len(state.transitions) <= 1 or state.decision >= 0) - else: - self.checkCondition(len(state.transitions) <= 1 or isinstance(state, RuleStopState)) - - def checkCondition(self, condition, message=None): - if not condition: - if message is None: - message = "IllegalState" - raise Exception(message) - - def readInt(self): - i = self.data[self.pos] - self.pos += 1 - return i - - def readInt32(self): - low = self.readInt() - high = self.readInt() - return low | (high << 16) - - def edgeFactory(self, atn, type, src, trg, arg1, arg2, arg3, sets): - target = atn.states[trg] - if self.edgeFactories is None: - ef = [None] * 11 - ef[0] = lambda args : None - ef[Transition.EPSILON] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ - EpsilonTransition(target) - ef[Transition.RANGE] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ - RangeTransition(target, Token.EOF, arg2) if arg3 != 0 else RangeTransition(target, arg1, arg2) - ef[Transition.RULE] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ - RuleTransition(atn.states[arg1], arg2, arg3, target) - ef[Transition.PREDICATE] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ - PredicateTransition(target, arg1, arg2, arg3 != 0) - ef[Transition.PRECEDENCE] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ - PrecedencePredicateTransition(target, arg1) - ef[Transition.ATOM] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ - AtomTransition(target, Token.EOF) if arg3 != 0 else AtomTransition(target, arg1) - ef[Transition.ACTION] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ - ActionTransition(target, arg1, arg2, arg3 != 0) - ef[Transition.SET] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ - SetTransition(target, sets[arg1]) - ef[Transition.NOT_SET] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ - NotSetTransition(target, sets[arg1]) - ef[Transition.WILDCARD] = lambda atn, src, trg, arg1, arg2, arg3, sets, target : \ - WildcardTransition(target) - self.edgeFactories = ef - - if type> len(self.edgeFactories) or self.edgeFactories[type] is None: - raise Exception("The specified transition type: " + str(type) + " is not valid.") - else: - return self.edgeFactories[type](atn, src, trg, arg1, arg2, arg3, sets, target) - - def stateFactory(self, type, ruleIndex): - if self.stateFactories is None: - sf = [None] * 13 - sf[ATNState.INVALID_TYPE] = lambda : None - sf[ATNState.BASIC] = lambda : BasicState() - sf[ATNState.RULE_START] = lambda : RuleStartState() - sf[ATNState.BLOCK_START] = lambda : BasicBlockStartState() - sf[ATNState.PLUS_BLOCK_START] = lambda : PlusBlockStartState() - sf[ATNState.STAR_BLOCK_START] = lambda : StarBlockStartState() - sf[ATNState.TOKEN_START] = lambda : TokensStartState() - sf[ATNState.RULE_STOP] = lambda : RuleStopState() - sf[ATNState.BLOCK_END] = lambda : BlockEndState() - sf[ATNState.STAR_LOOP_BACK] = lambda : StarLoopbackState() - sf[ATNState.STAR_LOOP_ENTRY] = lambda : StarLoopEntryState() - sf[ATNState.PLUS_LOOP_BACK] = lambda : PlusLoopbackState() - sf[ATNState.LOOP_END] = lambda : LoopEndState() - self.stateFactories = sf - - if type> len(self.stateFactories) or self.stateFactories[type] is None: - raise Exception("The specified state type " + str(type) + " is not valid.") - else: - s = self.stateFactories[type]() - if s is not None: - s.ruleIndex = ruleIndex - return s - - def lexerActionFactory(self, type, data1, data2): - if self.actionFactories is None: - af = [ None ] * 8 - af[LexerActionType.CHANNEL] = lambda data1, data2: LexerChannelAction(data1) - af[LexerActionType.CUSTOM] = lambda data1, data2: LexerCustomAction(data1, data2) - af[LexerActionType.MODE] = lambda data1, data2: LexerModeAction(data1) - af[LexerActionType.MORE] = lambda data1, data2: LexerMoreAction.INSTANCE - af[LexerActionType.POP_MODE] = lambda data1, data2: LexerPopModeAction.INSTANCE - af[LexerActionType.PUSH_MODE] = lambda data1, data2: LexerPushModeAction(data1) - af[LexerActionType.SKIP] = lambda data1, data2: LexerSkipAction.INSTANCE - af[LexerActionType.TYPE] = lambda data1, data2: LexerTypeAction(data1) - self.actionFactories = af - - if type> len(self.actionFactories) or self.actionFactories[type] is None: - raise Exception("The specified lexer action type " + str(type) + " is not valid.") - else: - return self.actionFactories[type](data1, data2) diff --git a/runtime/Python2/src/antlr4/atn/ATNSimulator.py b/runtime/Python2/src/antlr4/atn/ATNSimulator.py deleted file mode 100644 index 5e7e1f2313..0000000000 --- a/runtime/Python2/src/antlr4/atn/ATNSimulator.py +++ /dev/null @@ -1,45 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -#/ -from antlr4.PredictionContext import getCachedPredictionContext -from antlr4.atn.ATNConfigSet import ATNConfigSet -from antlr4.dfa.DFAState import DFAState - - -class ATNSimulator(object): - - # Must distinguish between missing edge and edge we know leads nowhere#/ - ERROR = DFAState(0x7FFFFFFF, ATNConfigSet()) - - # The context cache maps all PredictionContext objects that are == - # to a single cached copy. This cache is shared across all contexts - # in all ATNConfigs in all DFA states. We rebuild each ATNConfigSet - # to use only cached nodes/graphs in addDFAState(). We don't want to - # fill this during closure() since there are lots of contexts that - # pop up but are not used ever again. It also greatly slows down closure(). - # - #

      This cache makes a huge difference in memory and a little bit in speed. - # For the Java grammar on java.*, it dropped the memory requirements - # at the end from 25M to 16M. We don't store any of the full context - # graphs in the DFA because they are limited to local context only, - # but apparently there's a lot of repetition there as well. We optimize - # the config contexts before storing the config set in the DFA states - # by literally rebuilding them with cached subgraphs only.

      - # - #

      I tried a cache for use during closure operations, that was - # whacked after each adaptivePredict(). It cost a little bit - # more time I think and doesn't save on the overall footprint - # so it's not worth the complexity.

      - #/ - def __init__(self, atn, sharedContextCache): - self.atn = atn - self.sharedContextCache = sharedContextCache - - def getCachedContext(self, context): - if self.sharedContextCache is None: - return context - visited = dict() - return getCachedPredictionContext(context, self.sharedContextCache, visited) - diff --git a/runtime/Python2/src/antlr4/atn/ATNState.py b/runtime/Python2/src/antlr4/atn/ATNState.py deleted file mode 100644 index 7d1f94fdf0..0000000000 --- a/runtime/Python2/src/antlr4/atn/ATNState.py +++ /dev/null @@ -1,255 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - -# The following images show the relation of states and -# {@link ATNState#transitions} for various grammar constructs. -# -#
        -# -#
      • Solid edges marked with an ε indicate a required -# {@link EpsilonTransition}.
      • -# -#
      • Dashed edges indicate locations where any transition derived from -# {@link Transition} might appear.
      • -# -#
      • Dashed nodes are place holders for either a sequence of linked -# {@link BasicState} states or the inclusion of a block representing a nested -# construct in one of the forms below.
      • -# -#
      • Nodes showing multiple outgoing alternatives with a {@code ...} support -# any number of alternatives (one or more). Nodes without the {@code ...} only -# support the exact number of alternatives shown in the diagram.
      • -# -#
      -# -#

      Basic Blocks

      -# -#

      Rule

      -# -# -# -#

      Block of 1 or more alternatives

      -# -# -# -#

      Greedy Loops

      -# -#

      Greedy Closure: {@code (...)*}

      -# -# -# -#

      Greedy Positive Closure: {@code (...)+}

      -# -# -# -#

      Greedy Optional: {@code (...)?}

      -# -# -# -#

      Non-Greedy Loops

      -# -#

      Non-Greedy Closure: {@code (...)*?}

      -# -# -# -#

      Non-Greedy Positive Closure: {@code (...)+?}

      -# -# -# -#

      Non-Greedy Optional: {@code (...)??}

      -# -# -# - -INITIAL_NUM_TRANSITIONS = 4 - -class ATNState(object): - - # constants for serialization - INVALID_TYPE = 0 - BASIC = 1 - RULE_START = 2 - BLOCK_START = 3 - PLUS_BLOCK_START = 4 - STAR_BLOCK_START = 5 - TOKEN_START = 6 - RULE_STOP = 7 - BLOCK_END = 8 - STAR_LOOP_BACK = 9 - STAR_LOOP_ENTRY = 10 - PLUS_LOOP_BACK = 11 - LOOP_END = 12 - - serializationNames = [ - "INVALID", - "BASIC", - "RULE_START", - "BLOCK_START", - "PLUS_BLOCK_START", - "STAR_BLOCK_START", - "TOKEN_START", - "RULE_STOP", - "BLOCK_END", - "STAR_LOOP_BACK", - "STAR_LOOP_ENTRY", - "PLUS_LOOP_BACK", - "LOOP_END" ] - - INVALID_STATE_NUMBER = -1 - - def __init__(self): - # Which ATN are we in? - self.atn = None - self.stateNumber = ATNState.INVALID_STATE_NUMBER - self.stateType = None - self.ruleIndex = 0 # at runtime, we don't have Rule objects - self.epsilonOnlyTransitions = False - # Track the transitions emanating from this ATN state. - self.transitions = [] - # Used to cache lookahead during parsing, not used during construction - self.nextTokenWithinRule = None - - def __hash__(self): - return self.stateNumber - - def __eq__(self, other): - return isinstance(other, ATNState) and self.stateNumber==other.stateNumber - - def onlyHasEpsilonTransitions(self): - return self.epsilonOnlyTransitions - - def isNonGreedyExitState(self): - return False - - def __str__(self): - return unicode(self) - - def __unicode__(self): - return unicode(self.stateNumber) - - def addTransition(self, trans, index=-1): - if len(self.transitions)==0: - self.epsilonOnlyTransitions = trans.isEpsilon - elif self.epsilonOnlyTransitions != trans.isEpsilon: - self.epsilonOnlyTransitions = False - # TODO System.err.format(Locale.getDefault(), "ATN state %d has both epsilon and non-epsilon transitions.\n", stateNumber); - if index==-1: - self.transitions.append(trans) - else: - self.transitions.insert(index, trans) - -class BasicState(ATNState): - - def __init__(self): - super(BasicState, self).__init__() - self.stateType = self.BASIC - - -class DecisionState(ATNState): - - def __init__(self): - super(DecisionState, self).__init__() - self.decision = -1 - self.nonGreedy = False - -# The start of a regular {@code (...)} block. -class BlockStartState(DecisionState): - - def __init__(self): - super(BlockStartState, self).__init__() - self.endState = None - -class BasicBlockStartState(BlockStartState): - - def __init__(self): - super(BasicBlockStartState, self).__init__() - self.stateType = self.BLOCK_START - -# Terminal node of a simple {@code (a|b|c)} block. -class BlockEndState(ATNState): - - def __init__(self): - super(BlockEndState, self).__init__() - self.stateType = self.BLOCK_END - self.startState = None - -# The last node in the ATN for a rule, unless that rule is the start symbol. -# In that case, there is one transition to EOF. Later, we might encode -# references to all calls to this rule to compute FOLLOW sets for -# error handling. -# -class RuleStopState(ATNState): - - def __init__(self): - super(RuleStopState, self).__init__() - self.stateType = self.RULE_STOP - -class RuleStartState(ATNState): - - def __init__(self): - super(RuleStartState, self).__init__() - self.stateType = self.RULE_START - self.stopState = None - self.isPrecedenceRule = False - -# Decision state for {@code A+} and {@code (A|B)+}. It has two transitions: -# one to the loop back to start of the block and one to exit. -# -class PlusLoopbackState(DecisionState): - - def __init__(self): - super(PlusLoopbackState, self).__init__() - self.stateType = self.PLUS_LOOP_BACK - -# Start of {@code (A|B|...)+} loop. Technically a decision state, but -# we don't use for code generation; somebody might need it, so I'm defining -# it for completeness. In reality, the {@link PlusLoopbackState} node is the -# real decision-making note for {@code A+}. -# -class PlusBlockStartState(BlockStartState): - - def __init__(self): - super(PlusBlockStartState, self).__init__() - self.stateType = self.PLUS_BLOCK_START - self.loopBackState = None - -# The block that begins a closure loop. -class StarBlockStartState(BlockStartState): - - def __init__(self): - super(StarBlockStartState, self).__init__() - self.stateType = self.STAR_BLOCK_START - -class StarLoopbackState(ATNState): - - def __init__(self): - super(StarLoopbackState, self).__init__() - self.stateType = self.STAR_LOOP_BACK - - -class StarLoopEntryState(DecisionState): - - def __init__(self): - super(StarLoopEntryState, self).__init__() - self.stateType = self.STAR_LOOP_ENTRY - self.loopBackState = None - # Indicates whether this state can benefit from a precedence DFA during SLL decision making. - self.isPrecedenceDecision = None - -# Mark the end of a * or + loop. -class LoopEndState(ATNState): - - def __init__(self): - super(LoopEndState, self).__init__() - self.stateType = self.LOOP_END - self.loopBackState = None - -# The Tokens rule start state linking to each lexer rule start state */ -class TokensStartState(DecisionState): - - def __init__(self): - super(TokensStartState, self).__init__() - self.stateType = self.TOKEN_START diff --git a/runtime/Python2/src/antlr4/atn/ATNType.py b/runtime/Python2/src/antlr4/atn/ATNType.py deleted file mode 100644 index 4f168d5d59..0000000000 --- a/runtime/Python2/src/antlr4/atn/ATNType.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -#/ - -# Represents the type of recognizer an ATN applies to. - -class ATNType(object): - - LEXER = 0 - PARSER = 1 - diff --git a/runtime/Python2/src/antlr4/atn/LexerATNSimulator.py b/runtime/Python2/src/antlr4/atn/LexerATNSimulator.py deleted file mode 100644 index a782abf807..0000000000 --- a/runtime/Python2/src/antlr4/atn/LexerATNSimulator.py +++ /dev/null @@ -1,554 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -#/ - -# When we hit an accept state in either the DFA or the ATN, we -# have to notify the character stream to start buffering characters -# via {@link IntStream#mark} and record the current state. The current sim state -# includes the current index into the input, the current line, -# and current character position in that line. Note that the Lexer is -# tracking the starting line and characterization of the token. These -# variables track the "state" of the simulator when it hits an accept state. -# -#

      We track these variables separately for the DFA and ATN simulation -# because the DFA simulation often has to fail over to the ATN -# simulation. If the ATN simulation fails, we need the DFA to fall -# back to its previously accepted state, if any. If the ATN succeeds, -# then the ATN does the accept and the DFA simulator that invoked it -# can simply return the predicted token type.

      -#/ -from antlr4 import Lexer -from antlr4.PredictionContext import SingletonPredictionContext, PredictionContext -from antlr4.Token import Token -from antlr4.atn.ATN import ATN -from antlr4.atn.ATNConfig import LexerATNConfig -from antlr4.atn.ATNSimulator import ATNSimulator -from antlr4.atn.ATNConfigSet import ATNConfigSet, OrderedATNConfigSet -from antlr4.atn.ATNState import RuleStopState, ATNState -from antlr4.atn.LexerActionExecutor import LexerActionExecutor -from antlr4.atn.Transition import Transition -from antlr4.dfa.DFAState import DFAState -from antlr4.error.Errors import LexerNoViableAltException, UnsupportedOperationException - -class SimState(object): - - def __init__(self): - self.reset() - - def reset(self): - self.index = -1 - self.line = 0 - self.column = -1 - self.dfaState = None - -class LexerATNSimulator(ATNSimulator): - - debug = False - dfa_debug = False - - MIN_DFA_EDGE = 0 - MAX_DFA_EDGE = 127 # forces unicode to stay in ATN - - ERROR = None - - def __init__(self, recog, atn, decisionToDFA, sharedContextCache): - super(LexerATNSimulator, self).__init__(atn, sharedContextCache) - self.decisionToDFA = decisionToDFA - self.recog = recog - # The current token's starting index into the character stream. - # Shared across DFA to ATN simulation in case the ATN fails and the - # DFA did not have a previous accept state. In this case, we use the - # ATN-generated exception object. - self.startIndex = -1 - # line number 1..n within the input#/ - self.line = 1 - # The index of the character relative to the beginning of the line 0..n-1#/ - self.column = 0 - from antlr4.Lexer import Lexer - self.mode = Lexer.DEFAULT_MODE - # Used during DFA/ATN exec to record the most recent accept configuration info - self.prevAccept = SimState() - - - def copyState(self, simulator ): - self.column = simulator.column - self.line = simulator.line - self.mode = simulator.mode - self.startIndex = simulator.startIndex - - def match(self, input , mode): - self.mode = mode - mark = input.mark() - try: - self.startIndex = input.index - self.prevAccept.reset() - dfa = self.decisionToDFA[mode] - if dfa.s0 is None: - return self.matchATN(input) - else: - return self.execATN(input, dfa.s0) - finally: - input.release(mark) - - def reset(self): - self.prevAccept.reset() - self.startIndex = -1 - self.line = 1 - self.column = 0 - self.mode = Lexer.DEFAULT_MODE - - def matchATN(self, input): - startState = self.atn.modeToStartState[self.mode] - - if LexerATNSimulator.debug: - print("matchATN mode " + str(self.mode) + " start: " + str(startState)) - - old_mode = self.mode - s0_closure = self.computeStartState(input, startState) - suppressEdge = s0_closure.hasSemanticContext - s0_closure.hasSemanticContext = False - - next = self.addDFAState(s0_closure) - if not suppressEdge: - self.decisionToDFA[self.mode].s0 = next - - predict = self.execATN(input, next) - - if LexerATNSimulator.debug: - print("DFA after matchATN: " + str(self.decisionToDFA[old_mode].toLexerString())) - - return predict - - def execATN(self, input, ds0): - if LexerATNSimulator.debug: - print("start state closure=" + str(ds0.configs)) - - if ds0.isAcceptState: - # allow zero-length tokens - self.captureSimState(self.prevAccept, input, ds0) - - t = input.LA(1) - s = ds0 # s is current/from DFA state - - while True: # while more work - if LexerATNSimulator.debug: - print("execATN loop starting closure:", str(s.configs)) - - # As we move src->trg, src->trg, we keep track of the previous trg to - # avoid looking up the DFA state again, which is expensive. - # If the previous target was already part of the DFA, we might - # be able to avoid doing a reach operation upon t. If s!=null, - # it means that semantic predicates didn't prevent us from - # creating a DFA state. Once we know s!=null, we check to see if - # the DFA state has an edge already for t. If so, we can just reuse - # it's configuration set; there's no point in re-computing it. - # This is kind of like doing DFA simulation within the ATN - # simulation because DFA simulation is really just a way to avoid - # computing reach/closure sets. Technically, once we know that - # we have a previously added DFA state, we could jump over to - # the DFA simulator. But, that would mean popping back and forth - # a lot and making things more complicated algorithmically. - # This optimization makes a lot of sense for loops within DFA. - # A character will take us back to an existing DFA state - # that already has lots of edges out of it. e.g., .* in comments. - # print("Target for:" + str(s) + " and:" + str(t)) - target = self.getExistingTargetState(s, t) - # print("Existing:" + str(target)) - if target is None: - target = self.computeTargetState(input, s, t) - # print("Computed:" + str(target)) - - if target == self.ERROR: - break - - # If this is a consumable input element, make sure to consume before - # capturing the accept state so the input index, line, and char - # position accurately reflect the state of the interpreter at the - # end of the token. - if t != Token.EOF: - self.consume(input) - - if target.isAcceptState: - self.captureSimState(self.prevAccept, input, target) - if t == Token.EOF: - break - - t = input.LA(1) - - s = target # flip; current DFA target becomes new src/from state - - return self.failOrAccept(self.prevAccept, input, s.configs, t) - - # Get an existing target state for an edge in the DFA. If the target state - # for the edge has not yet been computed or is otherwise not available, - # this method returns {@code null}. - # - # @param s The current DFA state - # @param t The next input symbol - # @return The existing target DFA state for the given input symbol - # {@code t}, or {@code null} if the target state for this edge is not - # already cached - def getExistingTargetState(self, s, t): - if s.edges is None or t < self.MIN_DFA_EDGE or t > self.MAX_DFA_EDGE: - return None - - target = s.edges[t - self.MIN_DFA_EDGE] - if LexerATNSimulator.debug and target is not None: - print("reuse state", str(s.stateNumber), "edge to", str(target.stateNumber)) - - return target - - # Compute a target state for an edge in the DFA, and attempt to add the - # computed state and corresponding edge to the DFA. - # - # @param input The input stream - # @param s The current DFA state - # @param t The next input symbol - # - # @return The computed target DFA state for the given input symbol - # {@code t}. If {@code t} does not lead to a valid DFA state, this method - # returns {@link #ERROR}. - def computeTargetState(self, input, s, t): - reach = OrderedATNConfigSet() - - # if we don't find an existing DFA state - # Fill reach starting from closure, following t transitions - self.getReachableConfigSet(input, s.configs, reach, t) - - if len(reach)==0: # we got nowhere on t from s - if not reach.hasSemanticContext: - # we got nowhere on t, don't throw out this knowledge; it'd - # cause a failover from DFA later. - self. addDFAEdge(s, t, self.ERROR) - - # stop when we can't match any more char - return self.ERROR - - # Add an edge from s to target DFA found/created for reach - return self.addDFAEdge(s, t, cfgs=reach) - - def failOrAccept(self, prevAccept , input, reach, t): - if self.prevAccept.dfaState is not None: - lexerActionExecutor = prevAccept.dfaState.lexerActionExecutor - self.accept(input, lexerActionExecutor, self.startIndex, prevAccept.index, prevAccept.line, prevAccept.column) - return prevAccept.dfaState.prediction - else: - # if no accept and EOF is first char, return EOF - if t==Token.EOF and input.index==self.startIndex: - return Token.EOF - raise LexerNoViableAltException(self.recog, input, self.startIndex, reach) - - # Given a starting configuration set, figure out all ATN configurations - # we can reach upon input {@code t}. Parameter {@code reach} is a return - # parameter. - def getReachableConfigSet(self, input, closure, reach, t): - # this is used to skip processing for configs which have a lower priority - # than a config that already reached an accept state for the same rule - skipAlt = ATN.INVALID_ALT_NUMBER - for cfg in closure: - currentAltReachedAcceptState = ( cfg.alt == skipAlt ) - if currentAltReachedAcceptState and cfg.passedThroughNonGreedyDecision: - continue - - if LexerATNSimulator.debug: - print("testing", self.getTokenName(t), "at", str(cfg)) - - for trans in cfg.state.transitions: # for each transition - target = self.getReachableTarget(trans, t) - if target is not None: - lexerActionExecutor = cfg.lexerActionExecutor - if lexerActionExecutor is not None: - lexerActionExecutor = lexerActionExecutor.fixOffsetBeforeMatch(input.index - self.startIndex) - - treatEofAsEpsilon = (t == Token.EOF) - config = LexerATNConfig(state=target, lexerActionExecutor=lexerActionExecutor, config=cfg) - if self.closure(input, config, reach, currentAltReachedAcceptState, True, treatEofAsEpsilon): - # any remaining configs for this alt have a lower priority than - # the one that just reached an accept state. - skipAlt = cfg.alt - - def accept(self, input, lexerActionExecutor, startIndex, index, line, charPos): - if LexerATNSimulator.debug: - print("ACTION", lexerActionExecutor) - - # seek to after last char in token - input.seek(index) - self.line = line - self.column = charPos - - if lexerActionExecutor is not None and self.recog is not None: - lexerActionExecutor.execute(self.recog, input, startIndex) - - def getReachableTarget(self, trans, t): - if trans.matches(t, 0, Lexer.MAX_CHAR_VALUE): - return trans.target - else: - return None - - def computeStartState(self, input, p): - initialContext = PredictionContext.EMPTY - configs = OrderedATNConfigSet() - for i in range(0,len(p.transitions)): - target = p.transitions[i].target - c = LexerATNConfig(state=target, alt=i+1, context=initialContext) - self.closure(input, c, configs, False, False, False) - return configs - - # Since the alternatives within any lexer decision are ordered by - # preference, this method stops pursuing the closure as soon as an accept - # state is reached. After the first accept state is reached by depth-first - # search from {@code config}, all other (potentially reachable) states for - # this rule would have a lower priority. - # - # @return {@code true} if an accept state is reached, otherwise - # {@code false}. - def closure(self, input, config, configs, currentAltReachedAcceptState, - speculative, treatEofAsEpsilon): - if LexerATNSimulator.debug: - print("closure(" + str(config) + ")") - - if isinstance( config.state, RuleStopState ): - if LexerATNSimulator.debug: - if self.recog is not None: - print("closure at", self.recog.symbolicNames[config.state.ruleIndex], "rule stop", str(config)) - else: - print("closure at rule stop", str(config)) - - if config.context is None or config.context.hasEmptyPath(): - if config.context is None or config.context.isEmpty(): - configs.add(config) - return True - else: - configs.add(LexerATNConfig(state=config.state, config=config, context=PredictionContext.EMPTY)) - currentAltReachedAcceptState = True - - if config.context is not None and not config.context.isEmpty(): - for i in range(0,len(config.context)): - if config.context.getReturnState(i) != PredictionContext.EMPTY_RETURN_STATE: - newContext = config.context.getParent(i) # "pop" return state - returnState = self.atn.states[config.context.getReturnState(i)] - c = LexerATNConfig(state=returnState, config=config, context=newContext) - currentAltReachedAcceptState = self.closure(input, c, configs, - currentAltReachedAcceptState, speculative, treatEofAsEpsilon) - - return currentAltReachedAcceptState - - # optimization - if not config.state.epsilonOnlyTransitions: - if not currentAltReachedAcceptState or not config.passedThroughNonGreedyDecision: - configs.add(config) - - for t in config.state.transitions: - c = self.getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon) - if c is not None: - currentAltReachedAcceptState = self.closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon) - - return currentAltReachedAcceptState - - # side-effect: can alter configs.hasSemanticContext - def getEpsilonTarget(self, input, config, t, configs, speculative, treatEofAsEpsilon): - c = None - if t.serializationType==Transition.RULE: - newContext = SingletonPredictionContext.create(config.context, t.followState.stateNumber) - c = LexerATNConfig(state=t.target, config=config, context=newContext) - - elif t.serializationType==Transition.PRECEDENCE: - raise UnsupportedOperationException("Precedence predicates are not supported in lexers.") - - elif t.serializationType==Transition.PREDICATE: - # Track traversing semantic predicates. If we traverse, - # we cannot add a DFA state for this "reach" computation - # because the DFA would not test the predicate again in the - # future. Rather than creating collections of semantic predicates - # like v3 and testing them on prediction, v4 will test them on the - # fly all the time using the ATN not the DFA. This is slower but - # semantically it's not used that often. One of the key elements to - # this predicate mechanism is not adding DFA states that see - # predicates immediately afterwards in the ATN. For example, - - # a : ID {p1}? | ID {p2}? ; - - # should create the start state for rule 'a' (to save start state - # competition), but should not create target of ID state. The - # collection of ATN states the following ID references includes - # states reached by traversing predicates. Since this is when we - # test them, we cannot cash the DFA state target of ID. - - if LexerATNSimulator.debug: - print("EVAL rule "+ str(t.ruleIndex) + ":" + str(t.predIndex)) - configs.hasSemanticContext = True - if self.evaluatePredicate(input, t.ruleIndex, t.predIndex, speculative): - c = LexerATNConfig(state=t.target, config=config) - - elif t.serializationType==Transition.ACTION: - if config.context is None or config.context.hasEmptyPath(): - # execute actions anywhere in the start rule for a token. - # - # TODO: if the entry rule is invoked recursively, some - # actions may be executed during the recursive call. The - # problem can appear when hasEmptyPath() is true but - # isEmpty() is false. In this case, the config needs to be - # split into two contexts - one with just the empty path - # and another with everything but the empty path. - # Unfortunately, the current algorithm does not allow - # getEpsilonTarget to return two configurations, so - # additional modifications are needed before we can support - # the split operation. - lexerActionExecutor = LexerActionExecutor.append(config.lexerActionExecutor, - self.atn.lexerActions[t.actionIndex]) - c = LexerATNConfig(state=t.target, config=config, lexerActionExecutor=lexerActionExecutor) - - else: - # ignore actions in referenced rules - c = LexerATNConfig(state=t.target, config=config) - - elif t.serializationType==Transition.EPSILON: - c = LexerATNConfig(state=t.target, config=config) - - elif t.serializationType in [ Transition.ATOM, Transition.RANGE, Transition.SET ]: - if treatEofAsEpsilon: - if t.matches(Token.EOF, 0, Lexer.MAX_CHAR_VALUE): - c = LexerATNConfig(state=t.target, config=config) - - return c - - # Evaluate a predicate specified in the lexer. - # - #

      If {@code speculative} is {@code true}, this method was called before - # {@link #consume} for the matched character. This method should call - # {@link #consume} before evaluating the predicate to ensure position - # sensitive values, including {@link Lexer#getText}, {@link Lexer#getLine}, - # and {@link Lexer#getcolumn}, properly reflect the current - # lexer state. This method should restore {@code input} and the simulator - # to the original state before returning (i.e. undo the actions made by the - # call to {@link #consume}.

      - # - # @param input The input stream. - # @param ruleIndex The rule containing the predicate. - # @param predIndex The index of the predicate within the rule. - # @param speculative {@code true} if the current index in {@code input} is - # one character before the predicate's location. - # - # @return {@code true} if the specified predicate evaluates to - # {@code true}. - #/ - def evaluatePredicate(self, input, ruleIndex, predIndex, speculative): - # assume true if no recognizer was provided - if self.recog is None: - return True - - if not speculative: - return self.recog.sempred(None, ruleIndex, predIndex) - - savedcolumn = self.column - savedLine = self.line - index = input.index - marker = input.mark() - try: - self.consume(input) - return self.recog.sempred(None, ruleIndex, predIndex) - finally: - self.column = savedcolumn - self.line = savedLine - input.seek(index) - input.release(marker) - - def captureSimState(self, settings, input, dfaState): - settings.index = input.index - settings.line = self.line - settings.column = self.column - settings.dfaState = dfaState - - def addDFAEdge(self, from_, tk, to=None, cfgs=None): - - if to is None and cfgs is not None: - # leading to this call, ATNConfigSet.hasSemanticContext is used as a - # marker indicating dynamic predicate evaluation makes this edge - # dependent on the specific input sequence, so the static edge in the - # DFA should be omitted. The target DFAState is still created since - # execATN has the ability to resynchronize with the DFA state cache - # following the predicate evaluation step. - # - # TJP notes: next time through the DFA, we see a pred again and eval. - # If that gets us to a previously created (but dangling) DFA - # state, we can continue in pure DFA mode from there. - #/ - suppressEdge = cfgs.hasSemanticContext - cfgs.hasSemanticContext = False - - to = self.addDFAState(cfgs) - - if suppressEdge: - return to - - # add the edge - if tk < self.MIN_DFA_EDGE or tk > self.MAX_DFA_EDGE: - # Only track edges within the DFA bounds - return to - - if LexerATNSimulator.debug: - print("EDGE " + str(from_) + " -> " + str(to) + " upon "+ chr(tk)) - - if from_.edges is None: - # make room for tokens 1..n and -1 masquerading as index 0 - from_.edges = [ None ] * (self.MAX_DFA_EDGE - self.MIN_DFA_EDGE + 1) - - from_.edges[tk - self.MIN_DFA_EDGE] = to # connect - - return to - - - # Add a new DFA state if there isn't one with this set of - # configurations already. This method also detects the first - # configuration containing an ATN rule stop state. Later, when - # traversing the DFA, we will know which rule to accept. - def addDFAState(self, configs): - - proposed = DFAState(configs=configs) - firstConfigWithRuleStopState = next((cfg for cfg in configs if isinstance(cfg.state, RuleStopState)), None) - - if firstConfigWithRuleStopState is not None: - proposed.isAcceptState = True - proposed.lexerActionExecutor = firstConfigWithRuleStopState.lexerActionExecutor - proposed.prediction = self.atn.ruleToTokenType[firstConfigWithRuleStopState.state.ruleIndex] - - dfa = self.decisionToDFA[self.mode] - existing = dfa.states.get(proposed, None) - if existing is not None: - return existing - - newState = proposed - - newState.stateNumber = len(dfa.states) - configs.setReadonly(True) - newState.configs = configs - dfa.states[newState] = newState - return newState - - def getDFA(self, mode): - return self.decisionToDFA[mode] - - # Get the text matched so far for the current token. - def getText(self, input): - # index is first lookahead char, don't include. - return input.getText(self.startIndex, input.index-1) - - def consume(self, input): - curChar = input.LA(1) - if curChar==ord('\n'): - self.line += 1 - self.column = 0 - else: - self.column += 1 - input.consume() - - def getTokenName(self, t): - if t==-1: - return "EOF" - else: - return "'" + chr(t) + "'" - - -LexerATNSimulator.ERROR = DFAState(0x7FFFFFFF, ATNConfigSet()) diff --git a/runtime/Python2/src/antlr4/atn/LexerAction.py b/runtime/Python2/src/antlr4/atn/LexerAction.py deleted file mode 100644 index 19c0702141..0000000000 --- a/runtime/Python2/src/antlr4/atn/LexerAction.py +++ /dev/null @@ -1,291 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - -class LexerActionType(object): - - CHANNEL = 0 #The type of a {@link LexerChannelAction} action. - CUSTOM = 1 #The type of a {@link LexerCustomAction} action. - MODE = 2 #The type of a {@link LexerModeAction} action. - MORE = 3 #The type of a {@link LexerMoreAction} action. - POP_MODE = 4 #The type of a {@link LexerPopModeAction} action. - PUSH_MODE = 5 #The type of a {@link LexerPushModeAction} action. - SKIP = 6 #The type of a {@link LexerSkipAction} action. - TYPE = 7 #The type of a {@link LexerTypeAction} action. - -class LexerAction(object): - - def __init__(self, action): - self.actionType = action - self.isPositionDependent = False - - def __hash__(self): - return hash(self.actionType) - - def __eq__(self, other): - return self is other - - def __str__(self): - return unicode(self) - - def __unicode__(self): - return unicode(super(LexerAction, self)) - - -# -# Implements the {@code skip} lexer action by calling {@link Lexer#skip}. -# -#

      The {@code skip} command does not have any parameters, so this action is -# implemented as a singleton instance exposed by {@link #INSTANCE}.

      -class LexerSkipAction(LexerAction ): - - # Provides a singleton instance of this parameterless lexer action. - INSTANCE = None - - def __init__(self): - super(LexerSkipAction, self).__init__(LexerActionType.SKIP) - - def execute(self, lexer): - lexer.skip() - - def __unicode__(self): - return u"skip" - -LexerSkipAction.INSTANCE = LexerSkipAction() - -# Implements the {@code type} lexer action by calling {@link Lexer#setType} -# with the assigned type. -class LexerTypeAction(LexerAction): - - def __init__(self, type): - super(LexerTypeAction, self).__init__(LexerActionType.TYPE) - self.type = type - - def execute(self, lexer): - lexer.type = self.type - - def __hash__(self): - return hash((self.actionType, self.type)) - - def __eq__(self, other): - if self is other: - return True - elif not isinstance(other, LexerTypeAction): - return False - else: - return self.type == other.type - - def __unicode__(self): - return u"type(" + unicode(self.type) + u")" - - -# Implements the {@code pushMode} lexer action by calling -# {@link Lexer#pushMode} with the assigned mode. -class LexerPushModeAction(LexerAction): - - def __init__(self, mode): - super(LexerPushModeAction, self).__init__(LexerActionType.PUSH_MODE) - self.mode = mode - - #

      This action is implemented by calling {@link Lexer#pushMode} with the - # value provided by {@link #getMode}.

      - def execute(self, lexer): - lexer.pushMode(self.mode) - - def __hash__(self): - return hash((self.actionType, self.mode)) - - def __eq__(self, other): - if self is other: - return True - elif not isinstance(other, LexerPushModeAction): - return False - else: - return self.mode == other.mode - - def __unicode__(self): - return u"pushMode(" + unicode(self.mode) + u")" - - -# Implements the {@code popMode} lexer action by calling {@link Lexer#popMode}. -# -#

      The {@code popMode} command does not have any parameters, so this action is -# implemented as a singleton instance exposed by {@link #INSTANCE}.

      -class LexerPopModeAction(LexerAction): - - INSTANCE = None - - def __init__(self): - super(LexerPopModeAction, self).__init__(LexerActionType.POP_MODE) - - #

      This action is implemented by calling {@link Lexer#popMode}.

      - def execute(self, lexer): - lexer.popMode() - - def __unicode__(self): - return "popMode" - -LexerPopModeAction.INSTANCE = LexerPopModeAction() - -# Implements the {@code more} lexer action by calling {@link Lexer#more}. -# -#

      The {@code more} command does not have any parameters, so this action is -# implemented as a singleton instance exposed by {@link #INSTANCE}.

      -class LexerMoreAction(LexerAction): - - INSTANCE = None - - def __init__(self): - super(LexerMoreAction, self).__init__(LexerActionType.MORE) - - #

      This action is implemented by calling {@link Lexer#popMode}.

      - def execute(self, lexer): - lexer.more() - - def __unicode__(self): - return "more" - -LexerMoreAction.INSTANCE = LexerMoreAction() - -# Implements the {@code mode} lexer action by calling {@link Lexer#mode} with -# the assigned mode. -class LexerModeAction(LexerAction): - - def __init__(self, mode): - super(LexerModeAction, self).__init__(LexerActionType.MODE) - self.mode = mode - - #

      This action is implemented by calling {@link Lexer#mode} with the - # value provided by {@link #getMode}.

      - def execute(self, lexer): - lexer.mode(self.mode) - - def __hash__(self): - return hash((self.actionType, self.mode)) - - def __eq__(self, other): - if self is other: - return True - elif not isinstance(other, LexerModeAction): - return False - else: - return self.mode == other.mode - - def __unicode__(self): - return u"mode(" + unicode(self.mode) + u")" - -# Executes a custom lexer action by calling {@link Recognizer#action} with the -# rule and action indexes assigned to the custom action. The implementation of -# a custom action is added to the generated code for the lexer in an override -# of {@link Recognizer#action} when the grammar is compiled. -# -#

      This class may represent embedded actions created with the {...} -# syntax in ANTLR 4, as well as actions created for lexer commands where the -# command argument could not be evaluated when the grammar was compiled.

      - -class LexerCustomAction(LexerAction): - - # Constructs a custom lexer action with the specified rule and action - # indexes. - # - # @param ruleIndex The rule index to use for calls to - # {@link Recognizer#action}. - # @param actionIndex The action index to use for calls to - # {@link Recognizer#action}. - #/ - def __init__(self, ruleIndex, actionIndex): - super(LexerCustomAction, self).__init__(LexerActionType.CUSTOM) - self.ruleIndex = ruleIndex - self.actionIndex = actionIndex - self.isPositionDependent = True - - #

      Custom actions are implemented by calling {@link Lexer#action} with the - # appropriate rule and action indexes.

      - def execute(self, lexer): - lexer.action(None, self.ruleIndex, self.actionIndex) - - def __hash__(self): - return hash((self.actionType, self.ruleIndex, self.actionIndex)) - - def __eq__(self, other): - if self is other: - return True - elif not isinstance(other, LexerCustomAction): - return False - else: - return self.ruleIndex == other.ruleIndex and self.actionIndex == other.actionIndex - -# Implements the {@code channel} lexer action by calling -# {@link Lexer#setChannel} with the assigned channel. -class LexerChannelAction(LexerAction): - - # Constructs a new {@code channel} action with the specified channel value. - # @param channel The channel value to pass to {@link Lexer#setChannel}. - def __init__(self, channel): - super(LexerChannelAction, self).__init__(LexerActionType.CHANNEL) - self.channel = channel - - #

      This action is implemented by calling {@link Lexer#setChannel} with the - # value provided by {@link #getChannel}.

      - def execute(self, lexer): - lexer._channel = self.channel - - def __hash__(self): - return hash((self.actionType, self.channel)) - - def __eq__(self, other): - if self is other: - return True - elif not isinstance(other, LexerChannelAction): - return False - else: - return self.channel == other.channel - - def __unicode__(self): - return u"channel(" + unicode(self.channel) + u")" - -# This implementation of {@link LexerAction} is used for tracking input offsets -# for position-dependent actions within a {@link LexerActionExecutor}. -# -#

      This action is not serialized as part of the ATN, and is only required for -# position-dependent lexer actions which appear at a location other than the -# end of a rule. For more information about DFA optimizations employed for -# lexer actions, see {@link LexerActionExecutor#append} and -# {@link LexerActionExecutor#fixOffsetBeforeMatch}.

      -class LexerIndexedCustomAction(LexerAction): - - # Constructs a new indexed custom action by associating a character offset - # with a {@link LexerAction}. - # - #

      Note: This class is only required for lexer actions for which - # {@link LexerAction#isPositionDependent} returns {@code true}.

      - # - # @param offset The offset into the input {@link CharStream}, relative to - # the token start index, at which the specified lexer action should be - # executed. - # @param action The lexer action to execute at a particular offset in the - # input {@link CharStream}. - def __init__(self, offset, action): - super(LexerIndexedCustomAction, self).__init__(action.actionType) - self.offset = offset - self.action = action - self.isPositionDependent = True - - #

      This method calls {@link #execute} on the result of {@link #getAction} - # using the provided {@code lexer}.

      - def execute(self, lexer): - # assume the input stream position was properly set by the calling code - self.action.execute(lexer) - - def __hash__(self): - return hash((self.actionType, self.offset, self.action)) - - def __eq__(self, other): - if self is other: - return True - elif not isinstance(other, LexerIndexedCustomAction): - return False - else: - return self.offset == other.offset and self.action == other.action diff --git a/runtime/Python2/src/antlr4/atn/LexerActionExecutor.py b/runtime/Python2/src/antlr4/atn/LexerActionExecutor.py deleted file mode 100644 index 45f894c757..0000000000 --- a/runtime/Python2/src/antlr4/atn/LexerActionExecutor.py +++ /dev/null @@ -1,135 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -#/ - -# Represents an executor for a sequence of lexer actions which traversed during -# the matching operation of a lexer rule (token). -# -#

      The executor tracks position information for position-dependent lexer actions -# efficiently, ensuring that actions appearing only at the end of the rule do -# not cause bloating of the {@link DFA} created for the lexer.

      - - -from antlr4.atn.LexerAction import LexerIndexedCustomAction - -class LexerActionExecutor(object): - - def __init__(self, lexerActions=list()): - self.lexerActions = lexerActions - # Caches the result of {@link #hashCode} since the hash code is an element - # of the performance-critical {@link LexerATNConfig#hashCode} operation. - self.hashCode = hash("".join([str(la) for la in lexerActions])) - - - # Creates a {@link LexerActionExecutor} which executes the actions for - # the input {@code lexerActionExecutor} followed by a specified - # {@code lexerAction}. - # - # @param lexerActionExecutor The executor for actions already traversed by - # the lexer while matching a token within a particular - # {@link LexerATNConfig}. If this is {@code null}, the method behaves as - # though it were an empty executor. - # @param lexerAction The lexer action to execute after the actions - # specified in {@code lexerActionExecutor}. - # - # @return A {@link LexerActionExecutor} for executing the combine actions - # of {@code lexerActionExecutor} and {@code lexerAction}. - @staticmethod - def append(lexerActionExecutor, lexerAction): - if lexerActionExecutor is None: - return LexerActionExecutor([ lexerAction ]) - - lexerActions = lexerActionExecutor.lexerActions + [ lexerAction ] - return LexerActionExecutor(lexerActions) - - # Creates a {@link LexerActionExecutor} which encodes the current offset - # for position-dependent lexer actions. - # - #

      Normally, when the executor encounters lexer actions where - # {@link LexerAction#isPositionDependent} returns {@code true}, it calls - # {@link IntStream#seek} on the input {@link CharStream} to set the input - # position to the end of the current token. This behavior provides - # for efficient DFA representation of lexer actions which appear at the end - # of a lexer rule, even when the lexer rule matches a variable number of - # characters.

      - # - #

      Prior to traversing a match transition in the ATN, the current offset - # from the token start index is assigned to all position-dependent lexer - # actions which have not already been assigned a fixed offset. By storing - # the offsets relative to the token start index, the DFA representation of - # lexer actions which appear in the middle of tokens remains efficient due - # to sharing among tokens of the same length, regardless of their absolute - # position in the input stream.

      - # - #

      If the current executor already has offsets assigned to all - # position-dependent lexer actions, the method returns {@code this}.

      - # - # @param offset The current offset to assign to all position-dependent - # lexer actions which do not already have offsets assigned. - # - # @return A {@link LexerActionExecutor} which stores input stream offsets - # for all position-dependent lexer actions. - #/ - def fixOffsetBeforeMatch(self, offset): - updatedLexerActions = None - for i in range(0, len(self.lexerActions)): - if self.lexerActions[i].isPositionDependent and not isinstance(self.lexerActions[i], LexerIndexedCustomAction): - if updatedLexerActions is None: - updatedLexerActions = [ la for la in self.lexerActions ] - updatedLexerActions[i] = LexerIndexedCustomAction(offset, self.lexerActions[i]) - - if updatedLexerActions is None: - return self - else: - return LexerActionExecutor(updatedLexerActions) - - - # Execute the actions encapsulated by this executor within the context of a - # particular {@link Lexer}. - # - #

      This method calls {@link IntStream#seek} to set the position of the - # {@code input} {@link CharStream} prior to calling - # {@link LexerAction#execute} on a position-dependent action. Before the - # method returns, the input position will be restored to the same position - # it was in when the method was invoked.

      - # - # @param lexer The lexer instance. - # @param input The input stream which is the source for the current token. - # When this method is called, the current {@link IntStream#index} for - # {@code input} should be the start of the following token, i.e. 1 - # character past the end of the current token. - # @param startIndex The token start index. This value may be passed to - # {@link IntStream#seek} to set the {@code input} position to the beginning - # of the token. - #/ - def execute(self, lexer, input, startIndex): - requiresSeek = False - stopIndex = input.index - try: - for lexerAction in self.lexerActions: - if isinstance(lexerAction, LexerIndexedCustomAction): - offset = lexerAction.offset - input.seek(startIndex + offset) - lexerAction = lexerAction.action - requiresSeek = (startIndex + offset) != stopIndex - elif lexerAction.isPositionDependent: - input.seek(stopIndex) - requiresSeek = False - lexerAction.execute(lexer) - finally: - if requiresSeek: - input.seek(stopIndex) - - def __hash__(self): - return self.hashCode - - def __eq__(self, other): - if self is other: - return True - elif not isinstance(other, LexerActionExecutor): - return False - else: - return self.hashCode == other.hashCode \ - and self.lexerActions == other.lexerActions diff --git a/runtime/Python2/src/antlr4/atn/ParserATNSimulator.py b/runtime/Python2/src/antlr4/atn/ParserATNSimulator.py deleted file mode 100755 index 3cb1431761..0000000000 --- a/runtime/Python2/src/antlr4/atn/ParserATNSimulator.py +++ /dev/null @@ -1,1659 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - -# -# The embodiment of the adaptive LL(*), ALL(*), parsing strategy. -# -#

      -# The basic complexity of the adaptive strategy makes it harder to understand. -# We begin with ATN simulation to build paths in a DFA. Subsequent prediction -# requests go through the DFA first. If they reach a state without an edge for -# the current symbol, the algorithm fails over to the ATN simulation to -# complete the DFA path for the current input (until it finds a conflict state -# or uniquely predicting state).

      -# -#

      -# All of that is done without using the outer context because we want to create -# a DFA that is not dependent upon the rule invocation stack when we do a -# prediction. One DFA works in all contexts. We avoid using context not -# necessarily because it's slower, although it can be, but because of the DFA -# caching problem. The closure routine only considers the rule invocation stack -# created during prediction beginning in the decision rule. For example, if -# prediction occurs without invoking another rule's ATN, there are no context -# stacks in the configurations. When lack of context leads to a conflict, we -# don't know if it's an ambiguity or a weakness in the strong LL(*) parsing -# strategy (versus full LL(*)).

      -# -#

      -# When SLL yields a configuration set with conflict, we rewind the input and -# retry the ATN simulation, this time using full outer context without adding -# to the DFA. Configuration context stacks will be the full invocation stacks -# from the start rule. If we get a conflict using full context, then we can -# definitively say we have a true ambiguity for that input sequence. If we -# don't get a conflict, it implies that the decision is sensitive to the outer -# context. (It is not context-sensitive in the sense of context-sensitive -# grammars.)

      -# -#

      -# The next time we reach this DFA state with an SLL conflict, through DFA -# simulation, we will again retry the ATN simulation using full context mode. -# This is slow because we can't save the results and have to "interpret" the -# ATN each time we get that input.

      -# -#

      -# CACHING FULL CONTEXT PREDICTIONS

      -# -#

      -# We could cache results from full context to predicted alternative easily and -# that saves a lot of time but doesn't work in presence of predicates. The set -# of visible predicates from the ATN start state changes depending on the -# context, because closure can fall off the end of a rule. I tried to cache -# tuples (stack context, semantic context, predicted alt) but it was slower -# than interpreting and much more complicated. Also required a huge amount of -# memory. The goal is not to create the world's fastest parser anyway. I'd like -# to keep this algorithm simple. By launching multiple threads, we can improve -# the speed of parsing across a large number of files.

      -# -#

      -# There is no strict ordering between the amount of input used by SLL vs LL, -# which makes it really hard to build a cache for full context. Let's say that -# we have input A B C that leads to an SLL conflict with full context X. That -# implies that using X we might only use A B but we could also use A B C D to -# resolve conflict. Input A B C D could predict alternative 1 in one position -# in the input and A B C E could predict alternative 2 in another position in -# input. The conflicting SLL configurations could still be non-unique in the -# full context prediction, which would lead us to requiring more input than the -# original A B C. To make a prediction cache work, we have to track the exact -# input used during the previous prediction. That amounts to a cache that maps -# X to a specific DFA for that context.

      -# -#

      -# Something should be done for left-recursive expression predictions. They are -# likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry -# with full LL thing Sam does.

      -# -#

      -# AVOIDING FULL CONTEXT PREDICTION

      -# -#

      -# We avoid doing full context retry when the outer context is empty, we did not -# dip into the outer context by falling off the end of the decision state rule, -# or when we force SLL mode.

      -# -#

      -# As an example of the not dip into outer context case, consider as super -# constructor calls versus function calls. One grammar might look like -# this:

      -# -#
      -# ctorBody
      -#   : '{' superCall? stat* '}'
      -#   ;
      -# 
      -# -#

      -# Or, you might see something like

      -# -#
      -# stat
      -#   : superCall ';'
      -#   | expression ';'
      -#   | ...
      -#   ;
      -# 
      -# -#

      -# In both cases I believe that no closure operations will dip into the outer -# context. In the first case ctorBody in the worst case will stop at the '}'. -# In the 2nd case it should stop at the ';'. Both cases should stay within the -# entry rule and not dip into the outer context.

      -# -#

      -# PREDICATES

      -# -#

      -# Predicates are always evaluated if present in either SLL or LL both. SLL and -# LL simulation deals with predicates differently. SLL collects predicates as -# it performs closure operations like ANTLR v3 did. It delays predicate -# evaluation until it reaches and accept state. This allows us to cache the SLL -# ATN simulation whereas, if we had evaluated predicates on-the-fly during -# closure, the DFA state configuration sets would be different and we couldn't -# build up a suitable DFA.

      -# -#

      -# When building a DFA accept state during ATN simulation, we evaluate any -# predicates and return the sole semantically valid alternative. If there is -# more than 1 alternative, we report an ambiguity. If there are 0 alternatives, -# we throw an exception. Alternatives without predicates act like they have -# true predicates. The simple way to think about it is to strip away all -# alternatives with false predicates and choose the minimum alternative that -# remains.

      -# -#

      -# When we start in the DFA and reach an accept state that's predicated, we test -# those and return the minimum semantically viable alternative. If no -# alternatives are viable, we throw an exception.

      -# -#

      -# During full LL ATN simulation, closure always evaluates predicates and -# on-the-fly. This is crucial to reducing the configuration set size during -# closure. It hits a landmine when parsing with the Java grammar, for example, -# without this on-the-fly evaluation.

      -# -#

      -# SHARING DFA

      -# -#

      -# All instances of the same parser share the same decision DFAs through a -# static field. Each instance gets its own ATN simulator but they share the -# same {@link #decisionToDFA} field. They also share a -# {@link PredictionContextCache} object that makes sure that all -# {@link PredictionContext} objects are shared among the DFA states. This makes -# a big size difference.

      -# -#

      -# THREAD SAFETY

      -# -#

      -# The {@link ParserATNSimulator} locks on the {@link #decisionToDFA} field when -# it adds a new DFA object to that array. {@link #addDFAEdge} -# locks on the DFA for the current decision when setting the -# {@link DFAState#edges} field. {@link #addDFAState} locks on -# the DFA for the current decision when looking up a DFA state to see if it -# already exists. We must make sure that all requests to add DFA states that -# are equivalent result in the same shared DFA object. This is because lots of -# threads will be trying to update the DFA at once. The -# {@link #addDFAState} method also locks inside the DFA lock -# but this time on the shared context cache when it rebuilds the -# configurations' {@link PredictionContext} objects using cached -# subgraphs/nodes. No other locking occurs, even during DFA simulation. This is -# safe as long as we can guarantee that all threads referencing -# {@code s.edge[t]} get the same physical target {@link DFAState}, or -# {@code null}. Once into the DFA, the DFA simulation does not reference the -# {@link DFA#states} map. It follows the {@link DFAState#edges} field to new -# targets. The DFA simulator will either find {@link DFAState#edges} to be -# {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or -# {@code dfa.edges[t]} to be non-null. The -# {@link #addDFAEdge} method could be racing to set the field -# but in either case the DFA simulator works; if {@code null}, and requests ATN -# simulation. It could also race trying to get {@code dfa.edges[t]}, but either -# way it will work because it's not doing a test and set operation.

      -# -#

      -# Starting with SLL then failing to combined SLL/LL (Two-Stage -# Parsing)

      -# -#

      -# Sam pointed out that if SLL does not give a syntax error, then there is no -# point in doing full LL, which is slower. We only have to try LL if we get a -# syntax error. For maximum speed, Sam starts the parser set to pure SLL -# mode with the {@link BailErrorStrategy}:

      -# -#
      -# parser.{@link Parser#getInterpreter() getInterpreter()}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )};
      -# parser.{@link Parser#setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}());
      -# 
      -# -#

      -# If it does not get a syntax error, then we're done. If it does get a syntax -# error, we need to retry with the combined SLL/LL strategy.

      -# -#

      -# The reason this works is as follows. If there are no SLL conflicts, then the -# grammar is SLL (at least for that input set). If there is an SLL conflict, -# the full LL analysis must yield a set of viable alternatives which is a -# subset of the alternatives reported by SLL. If the LL set is a singleton, -# then the grammar is LL but not SLL. If the LL set is the same size as the SLL -# set, the decision is SLL. If the LL set has size > 1, then that decision -# is truly ambiguous on the current input. If the LL set is smaller, then the -# SLL conflict resolution might choose an alternative that the full LL would -# rule out as a possibility based upon better context information. If that's -# the case, then the SLL parse will definitely get an error because the full LL -# analysis says it's not viable. If SLL conflict resolution chooses an -# alternative within the LL set, them both SLL and LL would choose the same -# alternative because they both choose the minimum of multiple conflicting -# alternatives.

      -# -#

      -# Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and -# a smaller LL set called s. If s is {@code {2, 3}}, then SLL -# parsing will get an error because SLL will pursue alternative 1. If -# s is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will -# choose the same alternative because alternative one is the minimum of either -# set. If s is {@code {2}} or {@code {3}} then SLL will get a syntax -# error. If s is {@code {1}} then SLL will succeed.

      -# -#

      -# Of course, if the input is invalid, then we will get an error for sure in -# both SLL and LL parsing. Erroneous input will therefore require 2 passes over -# the input.

      -# -from __future__ import print_function - -import sys -from antlr4.ParserRuleContext import ParserRuleContext -from antlr4.PredictionContext import PredictionContext, SingletonPredictionContext, PredictionContextFromRuleContext -from antlr4.Token import Token -from antlr4.Utils import str_list -from antlr4.atn.ATN import ATN -from antlr4.atn.ATNConfig import ATNConfig -from antlr4.atn.ATNConfigSet import ATNConfigSet -from antlr4.atn.ATNSimulator import ATNSimulator -from antlr4.atn.ATNState import RuleStopState, ATNState -from antlr4.atn.PredictionMode import PredictionMode -from antlr4.atn.SemanticContext import SemanticContext, andContext, orContext -from antlr4.atn.Transition import Transition, RuleTransition, ActionTransition, AtomTransition, SetTransition, \ - NotSetTransition -from antlr4.dfa.DFAState import DFAState, PredPrediction -from antlr4.error.Errors import NoViableAltException - - -class ParserATNSimulator(ATNSimulator): - - debug = False - trace_atn_sim = False - dfa_debug = False - retry_debug = False - - - def __init__(self, parser, atn, decisionToDFA, sharedContextCache): - super(ParserATNSimulator, self).__init__(atn, sharedContextCache) - self.parser = parser - self.decisionToDFA = decisionToDFA - # SLL, LL, or LL + exact ambig detection?# - self.predictionMode = PredictionMode.LL - # LAME globals to avoid parameters!!!!! I need these down deep in predTransition - self._input = None - self._startIndex = 0 - self._outerContext = None - self._dfa = None - # Each prediction operation uses a cache for merge of prediction contexts. - # Don't keep around as it wastes huge amounts of memory. DoubleKeyMap - # isn't synchronized but we're ok since two threads shouldn't reuse same - # parser/atnsim object because it can only handle one input at a time. - # This maps graphs a and b to merged result c. (a,b)→c. We can avoid - # the merge if we ever see a and b again. Note that (b,a)→c should - # also be examined during cache lookup. - # - self.mergeCache = None - - - def reset(self): - pass - - def adaptivePredict(self, input, decision, outerContext): - if ParserATNSimulator.debug or ParserATNSimulator.trace_atn_sim: - print("adaptivePredict decision " + str(decision) + - " exec LA(1)==" + self.getLookaheadName(input) + - " line " + str(input.LT(1).line) + ":" + - str(input.LT(1).column)) - self._input = input - self._startIndex = input.index - self._outerContext = outerContext - - dfa = self.decisionToDFA[decision] - self._dfa = dfa - m = input.mark() - index = input.index - - # Now we are certain to have a specific decision's DFA - # But, do we still need an initial state? - try: - if dfa.precedenceDfa: - # the start state for a precedence DFA depends on the current - # parser precedence, and is provided by a DFA method. - s0 = dfa.getPrecedenceStartState(self.parser.getPrecedence()) - else: - # the start state for a "regular" DFA is just s0 - s0 = dfa.s0 - - if s0 is None: - if outerContext is None: - outerContext = ParserRuleContext.EMPTY - if ParserATNSimulator.debug: - print("predictATN decision " + str(dfa.decision) + - " exec LA(1)==" + self.getLookaheadName(input) + - ", outerContext=" + str(outerContext));#outerContext.toString(self.parser.literalNames, None)) - - fullCtx = False - s0_closure = self.computeStartState(dfa.atnStartState, ParserRuleContext.EMPTY, fullCtx) - - if dfa.precedenceDfa: - # If this is a precedence DFA, we use applyPrecedenceFilter - # to convert the computed start state to a precedence start - # state. We then use DFA.setPrecedenceStartState to set the - # appropriate start state for the precedence level rather - # than simply setting DFA.s0. - # - dfa.s0.configs = s0_closure # not used for prediction but useful to know start configs anyway - s0_closure = self.applyPrecedenceFilter(s0_closure) - s0 = self.addDFAState(dfa, DFAState(configs=s0_closure)) - dfa.setPrecedenceStartState(self.parser.getPrecedence(), s0) - else: - s0 = self.addDFAState(dfa, DFAState(configs=s0_closure)) - dfa.s0 = s0 - - alt = self.execATN(dfa, s0, input, index, outerContext) - if ParserATNSimulator.debug: - print("DFA after predictATN: " + dfa.toString(self.parser.literalNames)) - return alt - finally: - self._dfa = None - self.mergeCache = None # wack cache after each prediction - input.seek(index) - input.release(m) - - # Performs ATN simulation to compute a predicted alternative based - # upon the remaining input, but also updates the DFA cache to avoid - # having to traverse the ATN again for the same input sequence. - - # There are some key conditions we're looking for after computing a new - # set of ATN configs (proposed DFA state): - # if the set is empty, there is no viable alternative for current symbol - # does the state uniquely predict an alternative? - # does the state have a conflict that would prevent us from - # putting it on the work list? - - # We also have some key operations to do: - # add an edge from previous DFA state to potentially new DFA state, D, - # upon current symbol but only if adding to work list, which means in all - # cases except no viable alternative (and possibly non-greedy decisions?) - # collecting predicates and adding semantic context to DFA accept states - # adding rule context to context-sensitive DFA accept states - # consuming an input symbol - # reporting a conflict - # reporting an ambiguity - # reporting a context sensitivity - # reporting insufficient predicates - - # cover these cases: - # dead end - # single alt - # single alt + preds - # conflict - # conflict + preds - # - def execATN(self, dfa, s0, input, startIndex, outerContext ): - if ParserATNSimulator.debug or ParserATNSimulator.trace_atn_sim: - print("execATN decision " + str(dfa.decision) + - ", DFA state " + str(s0) + - ", LA(1)==" + self.getLookaheadName(input) + - " line " + str(input.LT(1).line) + ":" + str(input.LT(1).column)) - - previousD = s0 - - if ParserATNSimulator.debug: - print("s0 = " + str(s0)) - - t = input.LA(1) - - while True: # while more work - D = self.getExistingTargetState(previousD, t) - if D is None: - D = self.computeTargetState(dfa, previousD, t) - if D is self.ERROR: - # if any configs in previous dipped into outer context, that - # means that input up to t actually finished entry rule - # at least for SLL decision. Full LL doesn't dip into outer - # so don't need special case. - # We will get an error no matter what so delay until after - # decision; better error message. Also, no reachable target - # ATN states in SLL implies LL will also get nowhere. - # If conflict in states that dip out, choose min since we - # will get error no matter what. - e = self.noViableAlt(input, outerContext, previousD.configs, startIndex) - input.seek(startIndex) - alt = self.getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previousD.configs, outerContext) - if alt!=ATN.INVALID_ALT_NUMBER: - return alt - raise e - - if D.requiresFullContext and self.predictionMode != PredictionMode.SLL: - # IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error) - conflictingAlts = D.configs.conflictingAlts - if D.predicates is not None: - if ParserATNSimulator.debug: - print("DFA state has preds in DFA sim LL failover") - conflictIndex = input.index - if conflictIndex != startIndex: - input.seek(startIndex) - - conflictingAlts = self.evalSemanticContext(D.predicates, outerContext, True) - if len(conflictingAlts)==1: - if ParserATNSimulator.debug: - print("Full LL avoided") - return min(conflictingAlts) - - if conflictIndex != startIndex: - # restore the index so reporting the fallback to full - # context occurs with the index at the correct spot - input.seek(conflictIndex) - - if ParserATNSimulator.dfa_debug: - print("ctx sensitive state " + str(outerContext) +" in " + str(D)) - fullCtx = True - s0_closure = self.computeStartState(dfa.atnStartState, outerContext, fullCtx) - self.reportAttemptingFullContext(dfa, conflictingAlts, D.configs, startIndex, input.index) - alt = self.execATNWithFullContext(dfa, D, s0_closure, input, startIndex, outerContext) - return alt - - if D.isAcceptState: - if D.predicates is None: - return D.prediction - - stopIndex = input.index - input.seek(startIndex) - alts = self.evalSemanticContext(D.predicates, outerContext, True) - if len(alts)==0: - raise self.noViableAlt(input, outerContext, D.configs, startIndex) - elif len(alts)==1: - return min(alts) - else: - # report ambiguity after predicate evaluation to make sure the correct - # set of ambig alts is reported. - self.reportAmbiguity(dfa, D, startIndex, stopIndex, False, alts, D.configs) - return min(alts) - - previousD = D - - if t != Token.EOF: - input.consume() - t = input.LA(1) - - # - # Get an existing target state for an edge in the DFA. If the target state - # for the edge has not yet been computed or is otherwise not available, - # this method returns {@code null}. - # - # @param previousD The current DFA state - # @param t The next input symbol - # @return The existing target DFA state for the given input symbol - # {@code t}, or {@code null} if the target state for this edge is not - # already cached - # - def getExistingTargetState(self, previousD, t): - edges = previousD.edges - if edges is None or t + 1 < 0 or t + 1 >= len(edges): - return None - else: - return edges[t + 1] - - # - # Compute a target state for an edge in the DFA, and attempt to add the - # computed state and corresponding edge to the DFA. - # - # @param dfa The DFA - # @param previousD The current DFA state - # @param t The next input symbol - # - # @return The computed target DFA state for the given input symbol - # {@code t}. If {@code t} does not lead to a valid DFA state, this method - # returns {@link #ERROR}. - # - def computeTargetState(self, dfa, previousD, t): - reach = self.computeReachSet(previousD.configs, t, False) - if reach is None: - self.addDFAEdge(dfa, previousD, t, self.ERROR) - return self.ERROR - - # create new target state; we'll add to DFA after it's complete - D = DFAState(configs=reach) - - predictedAlt = self.getUniqueAlt(reach) - - if ParserATNSimulator.debug: - altSubSets = PredictionMode.getConflictingAltSubsets(reach) - print("SLL altSubSets=" + str(altSubSets) + ", configs=" + str(reach) + - ", predict=" + str(predictedAlt) + ", allSubsetsConflict=" + - str(PredictionMode.allSubsetsConflict(altSubSets)) + ", conflictingAlts=" + - str(self.getConflictingAlts(reach))) - - if predictedAlt!=ATN.INVALID_ALT_NUMBER: - # NO CONFLICT, UNIQUELY PREDICTED ALT - D.isAcceptState = True - D.configs.uniqueAlt = predictedAlt - D.prediction = predictedAlt - elif PredictionMode.hasSLLConflictTerminatingPrediction(self.predictionMode, reach): - # MORE THAN ONE VIABLE ALTERNATIVE - D.configs.conflictingAlts = self.getConflictingAlts(reach) - D.requiresFullContext = True - # in SLL-only mode, we will stop at this state and return the minimum alt - D.isAcceptState = True - D.prediction = min(D.configs.conflictingAlts) - - if D.isAcceptState and D.configs.hasSemanticContext: - self.predicateDFAState(D, self.atn.getDecisionState(dfa.decision)) - if D.predicates is not None: - D.prediction = ATN.INVALID_ALT_NUMBER - - # all adds to dfa are done after we've created full D state - D = self.addDFAEdge(dfa, previousD, t, D) - return D - - def predicateDFAState(self, dfaState, decisionState): - # We need to test all predicates, even in DFA states that - # uniquely predict alternative. - nalts = len(decisionState.transitions) - # Update DFA so reach becomes accept state with (predicate,alt) - # pairs if preds found for conflicting alts - altsToCollectPredsFrom = self.getConflictingAltsOrUniqueAlt(dfaState.configs) - altToPred = self.getPredsForAmbigAlts(altsToCollectPredsFrom, dfaState.configs, nalts) - if altToPred is not None: - dfaState.predicates = self.getPredicatePredictions(altsToCollectPredsFrom, altToPred) - dfaState.prediction = ATN.INVALID_ALT_NUMBER # make sure we use preds - else: - # There are preds in configs but they might go away - # when OR'd together like {p}? || NONE == NONE. If neither - # alt has preds, resolve to min alt - dfaState.prediction = min(altsToCollectPredsFrom) - - # comes back with reach.uniqueAlt set to a valid alt - def execATNWithFullContext(self, dfa, D, # how far we got before failing over - s0, - input, - startIndex, - outerContext): - if ParserATNSimulator.debug or ParserATNSimulator.trace_atn_sim: - print("execATNWithFullContext", str(s0)) - fullCtx = True - foundExactAmbig = False - reach = None - previous = s0 - input.seek(startIndex) - t = input.LA(1) - predictedAlt = -1 - while (True): # while more work - reach = self.computeReachSet(previous, t, fullCtx) - if reach is None: - # if any configs in previous dipped into outer context, that - # means that input up to t actually finished entry rule - # at least for LL decision. Full LL doesn't dip into outer - # so don't need special case. - # We will get an error no matter what so delay until after - # decision; better error message. Also, no reachable target - # ATN states in SLL implies LL will also get nowhere. - # If conflict in states that dip out, choose min since we - # will get error no matter what. - e = self.noViableAlt(input, outerContext, previous, startIndex) - input.seek(startIndex) - alt = self.getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previous, outerContext) - if alt!=ATN.INVALID_ALT_NUMBER: - return alt - else: - raise e - - altSubSets = PredictionMode.getConflictingAltSubsets(reach) - if ParserATNSimulator.debug: - print("LL altSubSets=" + str(altSubSets) + ", predict=" + - str(PredictionMode.getUniqueAlt(altSubSets)) + ", resolvesToJustOneViableAlt=" + - str(PredictionMode.resolvesToJustOneViableAlt(altSubSets))) - - reach.uniqueAlt = self.getUniqueAlt(reach) - # unique prediction? - if reach.uniqueAlt!=ATN.INVALID_ALT_NUMBER: - predictedAlt = reach.uniqueAlt - break - elif self.predictionMode is not PredictionMode.LL_EXACT_AMBIG_DETECTION: - predictedAlt = PredictionMode.resolvesToJustOneViableAlt(altSubSets) - if predictedAlt != ATN.INVALID_ALT_NUMBER: - break - else: - # In exact ambiguity mode, we never try to terminate early. - # Just keeps scarfing until we know what the conflict is - if PredictionMode.allSubsetsConflict(altSubSets) and PredictionMode.allSubsetsEqual(altSubSets): - foundExactAmbig = True - predictedAlt = PredictionMode.getSingleViableAlt(altSubSets) - break - # else there are multiple non-conflicting subsets or - # we're not sure what the ambiguity is yet. - # So, keep going. - - previous = reach - if t != Token.EOF: - input.consume() - t = input.LA(1) - - # If the configuration set uniquely predicts an alternative, - # without conflict, then we know that it's a full LL decision - # not SLL. - if reach.uniqueAlt != ATN.INVALID_ALT_NUMBER : - self.reportContextSensitivity(dfa, predictedAlt, reach, startIndex, input.index) - return predictedAlt - - # We do not check predicates here because we have checked them - # on-the-fly when doing full context prediction. - - # - # In non-exact ambiguity detection mode, we might actually be able to - # detect an exact ambiguity, but I'm not going to spend the cycles - # needed to check. We only emit ambiguity warnings in exact ambiguity - # mode. - # - # For example, we might know that we have conflicting configurations. - # But, that does not mean that there is no way forward without a - # conflict. It's possible to have nonconflicting alt subsets as in: - - # altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}] - - # from - # - # [(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]), - # (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])] - # - # In this case, (17,1,[5 $]) indicates there is some next sequence that - # would resolve this without conflict to alternative 1. Any other viable - # next sequence, however, is associated with a conflict. We stop - # looking for input because no amount of further lookahead will alter - # the fact that we should predict alternative 1. We just can't say for - # sure that there is an ambiguity without looking further. - - self.reportAmbiguity(dfa, D, startIndex, input.index, foundExactAmbig, None, reach) - - return predictedAlt - - def computeReachSet(self, closure, t, fullCtx): - if ParserATNSimulator.debug: - print("in computeReachSet, starting closure: " + str(closure)) - - if self.mergeCache is None: - self.mergeCache = dict() - - intermediate = ATNConfigSet(fullCtx) - - # Configurations already in a rule stop state indicate reaching the end - # of the decision rule (local context) or end of the start rule (full - # context). Once reached, these configurations are never updated by a - # closure operation, so they are handled separately for the performance - # advantage of having a smaller intermediate set when calling closure. - # - # For full-context reach operations, separate handling is required to - # ensure that the alternative matching the longest overall sequence is - # chosen when multiple such configurations can match the input. - - skippedStopStates = None - - # First figure out where we can reach on input t - for c in closure: - if ParserATNSimulator.debug: - print("testing " + self.getTokenName(t) + " at " + str(c)) - - if isinstance(c.state, RuleStopState): - if fullCtx or t == Token.EOF: - if skippedStopStates is None: - skippedStopStates = list() - skippedStopStates.append(c) - continue - - for trans in c.state.transitions: - target = self.getReachableTarget(trans, t) - if target is not None: - intermediate.add(ATNConfig(state=target, config=c), self.mergeCache) - - # Now figure out where the reach operation can take us... - - reach = None - - # This block optimizes the reach operation for intermediate sets which - # trivially indicate a termination state for the overall - # adaptivePredict operation. - # - # The conditions assume that intermediate - # contains all configurations relevant to the reach set, but this - # condition is not true when one or more configurations have been - # withheld in skippedStopStates , or when the current symbol is EOF. - # - if skippedStopStates is None and t!=Token.EOF: - if len(intermediate)==1: - # Don't pursue the closure if there is just one state. - # It can only have one alternative; just add to result - # Also don't pursue the closure if there is unique alternative - # among the configurations. - reach = intermediate - elif self.getUniqueAlt(intermediate)!=ATN.INVALID_ALT_NUMBER: - # Also don't pursue the closure if there is unique alternative - # among the configurations. - reach = intermediate - - # If the reach set could not be trivially determined, perform a closure - # operation on the intermediate set to compute its initial value. - # - if reach is None: - reach = ATNConfigSet(fullCtx) - closureBusy = set() - treatEofAsEpsilon = t == Token.EOF - for c in intermediate: - self.closure(c, reach, closureBusy, False, fullCtx, treatEofAsEpsilon) - - if t == Token.EOF: - # After consuming EOF no additional input is possible, so we are - # only interested in configurations which reached the end of the - # decision rule (local context) or end of the start rule (full - # context). Update reach to contain only these configurations. This - # handles both explicit EOF transitions in the grammar and implicit - # EOF transitions following the end of the decision or start rule. - # - # When reach==intermediate, no closure operation was performed. In - # this case, removeAllConfigsNotInRuleStopState needs to check for - # reachable rule stop states as well as configurations already in - # a rule stop state. - # - # This is handled before the configurations in skippedStopStates, - # because any configurations potentially added from that list are - # already guaranteed to meet this condition whether or not it's - # required. - # - reach = self.removeAllConfigsNotInRuleStopState(reach, reach is intermediate) - - # If skippedStopStates is not null, then it contains at least one - # configuration. For full-context reach operations, these - # configurations reached the end of the start rule, in which case we - # only add them back to reach if no configuration during the current - # closure operation reached such a state. This ensures adaptivePredict - # chooses an alternative matching the longest overall sequence when - # multiple alternatives are viable. - # - if skippedStopStates is not None and ( (not fullCtx) or (not PredictionMode.hasConfigInRuleStopState(reach))): - for c in skippedStopStates: - reach.add(c, self.mergeCache) - - if ParserATNSimulator.trace_atn_sim: - print("computeReachSet", str(closure), "->", reach) - - if len(reach)==0: - return None - else: - return reach - - # - # Return a configuration set containing only the configurations from - # {@code configs} which are in a {@link RuleStopState}. If all - # configurations in {@code configs} are already in a rule stop state, this - # method simply returns {@code configs}. - # - #

      When {@code lookToEndOfRule} is true, this method uses - # {@link ATN#nextTokens} for each configuration in {@code configs} which is - # not already in a rule stop state to see if a rule stop state is reachable - # from the configuration via epsilon-only transitions.

      - # - # @param configs the configuration set to update - # @param lookToEndOfRule when true, this method checks for rule stop states - # reachable by epsilon-only transitions from each configuration in - # {@code configs}. - # - # @return {@code configs} if all configurations in {@code configs} are in a - # rule stop state, otherwise return a new configuration set containing only - # the configurations from {@code configs} which are in a rule stop state - # - def removeAllConfigsNotInRuleStopState(self, configs, lookToEndOfRule): - if PredictionMode.allConfigsInRuleStopStates(configs): - return configs - result = ATNConfigSet(configs.fullCtx) - for config in configs: - if isinstance(config.state, RuleStopState): - result.add(config, self.mergeCache) - continue - if lookToEndOfRule and config.state.epsilonOnlyTransitions: - nextTokens = self.atn.nextTokens(config.state) - if Token.EPSILON in nextTokens: - endOfRuleState = self.atn.ruleToStopState[config.state.ruleIndex] - result.add(ATNConfig(state=endOfRuleState, config=config), self.mergeCache) - return result - - def computeStartState(self, p, ctx, fullCtx): - # always at least the implicit call to start rule - initialContext = PredictionContextFromRuleContext(self.atn, ctx) - configs = ATNConfigSet(fullCtx) - - if ParserATNSimulator.trace_atn_sim: - print("computeStartState from ATN state "+str(p)+ - " initialContext="+str(initialContext)) - - for i in range(0, len(p.transitions)): - target = p.transitions[i].target - c = ATNConfig(target, i+1, initialContext) - closureBusy = set() - self.closure(c, configs, closureBusy, True, fullCtx, False) - return configs - - # - # This method transforms the start state computed by - # {@link #computeStartState} to the special start state used by a - # precedence DFA for a particular precedence value. The transformation - # process applies the following changes to the start state's configuration - # set. - # - #
        - #
      1. Evaluate the precedence predicates for each configuration using - # {@link SemanticContext#evalPrecedence}.
      2. - #
      3. Remove all configurations which predict an alternative greater than - # 1, for which another configuration that predicts alternative 1 is in the - # same ATN state with the same prediction context. This transformation is - # valid for the following reasons: - #
          - #
        • The closure block cannot contain any epsilon transitions which bypass - # the body of the closure, so all states reachable via alternative 1 are - # part of the precedence alternatives of the transformed left-recursive - # rule.
        • - #
        • The "primary" portion of a left recursive rule cannot contain an - # epsilon transition, so the only way an alternative other than 1 can exist - # in a state that is also reachable via alternative 1 is by nesting calls - # to the left-recursive rule, with the outer calls not being at the - # preferred precedence level.
        • - #
        - #
      4. - #
      - # - #

      - # The prediction context must be considered by this filter to address - # situations like the following. - #

      - # - #
      -    # grammar TA;
      -    # prog: statement* EOF;
      -    # statement: letterA | statement letterA 'b' ;
      -    # letterA: 'a';
      -    # 
      - #
      - #

      - # If the above grammar, the ATN state immediately before the token - # reference {@code 'a'} in {@code letterA} is reachable from the left edge - # of both the primary and closure blocks of the left-recursive rule - # {@code statement}. The prediction context associated with each of these - # configurations distinguishes between them, and prevents the alternative - # which stepped out to {@code prog} (and then back in to {@code statement} - # from being eliminated by the filter. - #

      - # - # @param configs The configuration set computed by - # {@link #computeStartState} as the start state for the DFA. - # @return The transformed configuration set representing the start state - # for a precedence DFA at a particular precedence level (determined by - # calling {@link Parser#getPrecedence}). - # - def applyPrecedenceFilter(self, configs): - statesFromAlt1 = dict() - configSet = ATNConfigSet(configs.fullCtx) - for config in configs: - # handle alt 1 first - if config.alt != 1: - continue - updatedContext = config.semanticContext.evalPrecedence(self.parser, self._outerContext) - if updatedContext is None: - # the configuration was eliminated - continue - - statesFromAlt1[config.state.stateNumber] = config.context - if updatedContext is not config.semanticContext: - configSet.add(ATNConfig(config=config, semantic=updatedContext), self.mergeCache) - else: - configSet.add(config, self.mergeCache) - - for config in configs: - if config.alt == 1: - # already handled - continue - - # In the future, this elimination step could be updated to also - # filter the prediction context for alternatives predicting alt>1 - # (basically a graph subtraction algorithm). - # - if not config.precedenceFilterSuppressed: - context = statesFromAlt1.get(config.state.stateNumber, None) - if context==config.context: - # eliminated - continue - - configSet.add(config, self.mergeCache) - - return configSet - - def getReachableTarget(self, trans, ttype): - if trans.matches(ttype, 0, self.atn.maxTokenType): - return trans.target - else: - return None - - def getPredsForAmbigAlts(self, ambigAlts, configs, nalts): - # REACH=[1|1|[]|0:0, 1|2|[]|0:1] - # altToPred starts as an array of all null contexts. The entry at index i - # corresponds to alternative i. altToPred[i] may have one of three values: - # 1. null: no ATNConfig c is found such that c.alt==i - # 2. SemanticContext.NONE: At least one ATNConfig c exists such that - # c.alt==i and c.semanticContext==SemanticContext.NONE. In other words, - # alt i has at least one unpredicated config. - # 3. Non-NONE Semantic Context: There exists at least one, and for all - # ATNConfig c such that c.alt==i, c.semanticContext!=SemanticContext.NONE. - # - # From this, it is clear that NONE||anything==NONE. - # - altToPred = [None] * (nalts + 1) - for c in configs: - if c.alt in ambigAlts: - altToPred[c.alt] = orContext(altToPred[c.alt], c.semanticContext) - - nPredAlts = 0 - for i in range(1, nalts+1): - if altToPred[i] is None: - altToPred[i] = SemanticContext.NONE - elif altToPred[i] is not SemanticContext.NONE: - nPredAlts += 1 - - # nonambig alts are null in altToPred - if nPredAlts==0: - altToPred = None - if ParserATNSimulator.debug: - print("getPredsForAmbigAlts result " + str_list(altToPred)) - return altToPred - - def getPredicatePredictions(self, ambigAlts, altToPred): - pairs = [] - containsPredicate = False - for i in range(1, len(altToPred)): - pred = altToPred[i] - # unpredicated is indicated by SemanticContext.NONE - if ambigAlts is not None and i in ambigAlts: - pairs.append(PredPrediction(pred, i)) - if pred is not SemanticContext.NONE: - containsPredicate = True - - if not containsPredicate: - return None - - return pairs - - # - # This method is used to improve the localization of error messages by - # choosing an alternative rather than throwing a - # {@link NoViableAltException} in particular prediction scenarios where the - # {@link #ERROR} state was reached during ATN simulation. - # - #

      - # The default implementation of this method uses the following - # algorithm to identify an ATN configuration which successfully parsed the - # decision entry rule. Choosing such an alternative ensures that the - # {@link ParserRuleContext} returned by the calling rule will be complete - # and valid, and the syntax error will be reported later at a more - # localized location.

      - # - #
        - #
      • If a syntactically valid path or paths reach the end of the decision rule and - # they are semantically valid if predicated, return the min associated alt.
      • - #
      • Else, if a semantically invalid but syntactically valid path exist - # or paths exist, return the minimum associated alt. - #
      • - #
      • Otherwise, return {@link ATN#INVALID_ALT_NUMBER}.
      • - #
      - # - #

      - # In some scenarios, the algorithm described above could predict an - # alternative which will result in a {@link FailedPredicateException} in - # the parser. Specifically, this could occur if the only configuration - # capable of successfully parsing to the end of the decision rule is - # blocked by a semantic predicate. By choosing this alternative within - # {@link #adaptivePredict} instead of throwing a - # {@link NoViableAltException}, the resulting - # {@link FailedPredicateException} in the parser will identify the specific - # predicate which is preventing the parser from successfully parsing the - # decision rule, which helps developers identify and correct logic errors - # in semantic predicates. - #

      - # - # @param configs The ATN configurations which were valid immediately before - # the {@link #ERROR} state was reached - # @param outerContext The is the \gamma_0 initial parser context from the paper - # or the parser stack at the instant before prediction commences. - # - # @return The value to return from {@link #adaptivePredict}, or - # {@link ATN#INVALID_ALT_NUMBER} if a suitable alternative was not - # identified and {@link #adaptivePredict} should report an error instead. - # - def getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(self, configs, outerContext): - semValidConfigs, semInvalidConfigs = self.splitAccordingToSemanticValidity(configs, outerContext) - alt = self.getAltThatFinishedDecisionEntryRule(semValidConfigs) - if alt!=ATN.INVALID_ALT_NUMBER: # semantically/syntactically viable path exists - return alt - # Is there a syntactically valid path with a failed pred? - if len(semInvalidConfigs)>0: - alt = self.getAltThatFinishedDecisionEntryRule(semInvalidConfigs) - if alt!=ATN.INVALID_ALT_NUMBER: # syntactically viable path exists - return alt - return ATN.INVALID_ALT_NUMBER - - def getAltThatFinishedDecisionEntryRule(self, configs): - alts = set() - for c in configs: - if c.reachesIntoOuterContext>0 or (isinstance(c.state, RuleStopState) and c.context.hasEmptyPath() ): - alts.add(c.alt) - if len(alts)==0: - return ATN.INVALID_ALT_NUMBER - else: - return min(alts) - - # Walk the list of configurations and split them according to - # those that have preds evaluating to true/false. If no pred, assume - # true pred and include in succeeded set. Returns Pair of sets. - # - # Create a new set so as not to alter the incoming parameter. - # - # Assumption: the input stream has been restored to the starting point - # prediction, which is where predicates need to evaluate. - # - def splitAccordingToSemanticValidity(self, configs, outerContext): - succeeded = ATNConfigSet(configs.fullCtx) - failed = ATNConfigSet(configs.fullCtx) - for c in configs: - if c.semanticContext is not SemanticContext.NONE: - predicateEvaluationResult = c.semanticContext.eval(self.parser, outerContext) - if predicateEvaluationResult: - succeeded.add(c) - else: - failed.add(c) - else: - succeeded.add(c) - return (succeeded,failed) - - # Look through a list of predicate/alt pairs, returning alts for the - # pairs that win. A {@code NONE} predicate indicates an alt containing an - # unpredicated config which behaves as "always true." If !complete - # then we stop at the first predicate that evaluates to true. This - # includes pairs with null predicates. - # - def evalSemanticContext(self, predPredictions, outerContext, complete): - predictions = set() - for pair in predPredictions: - if pair.pred is SemanticContext.NONE: - predictions.add(pair.alt) - if not complete: - break - continue - predicateEvaluationResult = pair.pred.eval(self.parser, outerContext) - if ParserATNSimulator.debug or ParserATNSimulator.dfa_debug: - print("eval pred " + str(pair) + "=" + str(predicateEvaluationResult)) - - if predicateEvaluationResult: - if ParserATNSimulator.debug or ParserATNSimulator.dfa_debug: - print("PREDICT " + str(pair.alt)) - predictions.add(pair.alt) - if not complete: - break - return predictions - - - # TODO: If we are doing predicates, there is no point in pursuing - # closure operations if we reach a DFA state that uniquely predicts - # alternative. We will not be caching that DFA state and it is a - # waste to pursue the closure. Might have to advance when we do - # ambig detection thought :( - # - - def closure(self, config, configs, closureBusy, collectPredicates, fullCtx, treatEofAsEpsilon): - initialDepth = 0; - self.closureCheckingStopState(config, configs, closureBusy, collectPredicates, - fullCtx, initialDepth, treatEofAsEpsilon) - - - def closureCheckingStopState(self, config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon): - if ParserATNSimulator.trace_atn_sim: - print("closure(" + str(config) + ")") - - if isinstance(config.state, RuleStopState): - # We hit rule end. If we have context info, use it - # run thru all possible stack tops in ctx - if not config.context.isEmpty(): - for i in range(0, len(config.context)): - state = config.context.getReturnState(i) - if state is PredictionContext.EMPTY_RETURN_STATE: - if fullCtx: - configs.add(ATNConfig(state=config.state, context=PredictionContext.EMPTY, config=config), self.mergeCache) - continue - else: - # we have no context info, just chase follow links (if greedy) - if ParserATNSimulator.debug: - print("FALLING off rule " + self.getRuleName(config.state.ruleIndex)) - self.closure_(config, configs, closureBusy, collectPredicates, - fullCtx, depth, treatEofAsEpsilon) - continue - returnState = self.atn.states[state] - newContext = config.context.getParent(i) # "pop" return state - c = ATNConfig(state=returnState, alt=config.alt, context=newContext, semantic=config.semanticContext) - # While we have context to pop back from, we may have - # gotten that context AFTER having falling off a rule. - # Make sure we track that we are now out of context. - c.reachesIntoOuterContext = config.reachesIntoOuterContext - self.closureCheckingStopState(c, configs, closureBusy, collectPredicates, fullCtx, depth - 1, treatEofAsEpsilon) - return - elif fullCtx: - # reached end of start rule - configs.add(config, self.mergeCache) - return - else: - # else if we have no context info, just chase follow links (if greedy) - if ParserATNSimulator.debug: - print("FALLING off rule " + self.getRuleName(config.state.ruleIndex)) - - self.closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon) - - # Do the actual work of walking epsilon edges# - def closure_(self, config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon): - p = config.state - # optimization - if not p.epsilonOnlyTransitions: - configs.add(config, self.mergeCache) - # make sure to not return here, because EOF transitions can act as - # both epsilon transitions and non-epsilon transitions. - - first = True - for t in p.transitions: - if first: - first = False - if self.canDropLoopEntryEdgeInLeftRecursiveRule(config): - continue - - continueCollecting = collectPredicates and not isinstance(t, ActionTransition) - c = self.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon) - if c is not None: - newDepth = depth - if isinstance( config.state, RuleStopState): - # target fell off end of rule; mark resulting c as having dipped into outer context - # We can't get here if incoming config was rule stop and we had context - # track how far we dip into outer context. Might - # come in handy and we avoid evaluating context dependent - # preds if this is > 0. - - if self._dfa is not None and self._dfa.precedenceDfa: - if t.outermostPrecedenceReturn == self._dfa.atnStartState.ruleIndex: - c.precedenceFilterSuppressed = True - c.reachesIntoOuterContext += 1 - if c in closureBusy: - # avoid infinite recursion for right-recursive rules - continue - closureBusy.add(c) - configs.dipsIntoOuterContext = True # TODO: can remove? only care when we add to set per middle of this method - newDepth -= 1 - if ParserATNSimulator.debug: - print("dips into outer ctx: " + str(c)) - else: - if not t.isEpsilon: - if c in closureBusy: - # avoid infinite recursion for EOF* and EOF+ - continue - closureBusy.add(c) - if isinstance(t, RuleTransition): - # latch when newDepth goes negative - once we step out of the entry context we can't return - if newDepth >= 0: - newDepth += 1 - - self.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon) - - - - # Implements first-edge (loop entry) elimination as an optimization - # during closure operations. See antlr/antlr4#1398. - # - # The optimization is to avoid adding the loop entry config when - # the exit path can only lead back to the same - # StarLoopEntryState after popping context at the rule end state - # (traversing only epsilon edges, so we're still in closure, in - # this same rule). - # - # We need to detect any state that can reach loop entry on - # epsilon w/o exiting rule. We don't have to look at FOLLOW - # links, just ensure that all stack tops for config refer to key - # states in LR rule. - # - # To verify we are in the right situation we must first check - # closure is at a StarLoopEntryState generated during LR removal. - # Then we check that each stack top of context is a return state - # from one of these cases: - # - # 1. 'not' expr, '(' type ')' expr. The return state points at loop entry state - # 2. expr op expr. The return state is the block end of internal block of (...)* - # 3. 'between' expr 'and' expr. The return state of 2nd expr reference. - # That state points at block end of internal block of (...)*. - # 4. expr '?' expr ':' expr. The return state points at block end, - # which points at loop entry state. - # - # If any is true for each stack top, then closure does not add a - # config to the current config set for edge[0], the loop entry branch. - # - # Conditions fail if any context for the current config is: - # - # a. empty (we'd fall out of expr to do a global FOLLOW which could - # even be to some weird spot in expr) or, - # b. lies outside of expr or, - # c. lies within expr but at a state not the BlockEndState - # generated during LR removal - # - # Do we need to evaluate predicates ever in closure for this case? - # - # No. Predicates, including precedence predicates, are only - # evaluated when computing a DFA start state. I.e., only before - # the lookahead (but not parser) consumes a token. - # - # There are no epsilon edges allowed in LR rule alt blocks or in - # the "primary" part (ID here). If closure is in - # StarLoopEntryState any lookahead operation will have consumed a - # token as there are no epsilon-paths that lead to - # StarLoopEntryState. We do not have to evaluate predicates - # therefore if we are in the generated StarLoopEntryState of a LR - # rule. Note that when making a prediction starting at that - # decision point, decision d=2, compute-start-state performs - # closure starting at edges[0], edges[1] emanating from - # StarLoopEntryState. That means it is not performing closure on - # StarLoopEntryState during compute-start-state. - # - # How do we know this always gives same prediction answer? - # - # Without predicates, loop entry and exit paths are ambiguous - # upon remaining input +b (in, say, a+b). Either paths lead to - # valid parses. Closure can lead to consuming + immediately or by - # falling out of this call to expr back into expr and loop back - # again to StarLoopEntryState to match +b. In this special case, - # we choose the more efficient path, which is to take the bypass - # path. - # - # The lookahead language has not changed because closure chooses - # one path over the other. Both paths lead to consuming the same - # remaining input during a lookahead operation. If the next token - # is an operator, lookahead will enter the choice block with - # operators. If it is not, lookahead will exit expr. Same as if - # closure had chosen to enter the choice block immediately. - # - # Closure is examining one config (some loopentrystate, some alt, - # context) which means it is considering exactly one alt. Closure - # always copies the same alt to any derived configs. - # - # How do we know this optimization doesn't mess up precedence in - # our parse trees? - # - # Looking through expr from left edge of stat only has to confirm - # that an input, say, a+b+c; begins with any valid interpretation - # of an expression. The precedence actually doesn't matter when - # making a decision in stat seeing through expr. It is only when - # parsing rule expr that we must use the precedence to get the - # right interpretation and, hence, parse tree. - # - # @since 4.6 - # - def canDropLoopEntryEdgeInLeftRecursiveRule(self, config): - # return False - p = config.state - # First check to see if we are in StarLoopEntryState generated during - # left-recursion elimination. For efficiency, also check if - # the context has an empty stack case. If so, it would mean - # global FOLLOW so we can't perform optimization - # Are we the special loop entry/exit state? or SLL wildcard - if p.stateType != ATNState.STAR_LOOP_ENTRY \ - or not p.isPrecedenceDecision \ - or config.context.isEmpty() \ - or config.context.hasEmptyPath(): - return False - - # Require all return states to return back to the same rule - # that p is in. - numCtxs = len(config.context) - for i in range(0, numCtxs): # for each stack context - returnState = self.atn.states[config.context.getReturnState(i)] - if returnState.ruleIndex != p.ruleIndex: - return False - - decisionStartState = p.transitions[0].target - blockEndStateNum = decisionStartState.endState.stateNumber - blockEndState = self.atn.states[blockEndStateNum] - - # Verify that the top of each stack context leads to loop entry/exit - # state through epsilon edges and w/o leaving rule. - for i in range(0, numCtxs): # for each stack context - returnStateNumber = config.context.getReturnState(i) - returnState = self.atn.states[returnStateNumber] - # all states must have single outgoing epsilon edge - if len(returnState.transitions) != 1 or not returnState.transitions[0].isEpsilon: - return False - - # Look for prefix op case like 'not expr', (' type ')' expr - returnStateTarget = returnState.transitions[0].target - if returnState.stateType == ATNState.BLOCK_END and returnStateTarget is p: - continue - - # Look for 'expr op expr' or case where expr's return state is block end - # of (...)* internal block; the block end points to loop back - # which points to p but we don't need to check that - if returnState is blockEndState: - continue - - # Look for ternary expr ? expr : expr. The return state points at block end, - # which points at loop entry state - if returnStateTarget is blockEndState: - continue - - # Look for complex prefix 'between expr and expr' case where 2nd expr's - # return state points at block end state of (...)* internal block - if returnStateTarget.stateType == ATNState.BLOCK_END \ - and len(returnStateTarget.transitions) == 1 \ - and returnStateTarget.transitions[0].isEpsilon \ - and returnStateTarget.transitions[0].target is p: - continue - - # anything else ain't conforming - return False - - return True - - - def getRuleName(self, index): - if self.parser is not None and index>=0: - return self.parser.ruleNames[index] - else: - return "" - - epsilonTargetMethods = dict() - epsilonTargetMethods[Transition.RULE] = lambda sim, config, t, collectPredicates, inContext, fullCtx, treatEofAsEpsilon: \ - sim.ruleTransition(config, t) - epsilonTargetMethods[Transition.PRECEDENCE] = lambda sim, config, t, collectPredicates, inContext, fullCtx, treatEofAsEpsilon: \ - sim.precedenceTransition(config, t, collectPredicates, inContext, fullCtx) - epsilonTargetMethods[Transition.PREDICATE] = lambda sim, config, t, collectPredicates, inContext, fullCtx, treatEofAsEpsilon: \ - sim.predTransition(config, t, collectPredicates, inContext, fullCtx) - epsilonTargetMethods[Transition.ACTION] = lambda sim, config, t, collectPredicates, inContext, fullCtx, treatEofAsEpsilon: \ - sim.actionTransition(config, t) - epsilonTargetMethods[Transition.EPSILON] = lambda sim, config, t, collectPredicates, inContext, fullCtx, treatEofAsEpsilon: \ - ATNConfig(state=t.target, config=config) - epsilonTargetMethods[Transition.ATOM] = lambda sim, config, t, collectPredicates, inContext, fullCtx, treatEofAsEpsilon: \ - ATNConfig(state=t.target, config=config) if treatEofAsEpsilon and t.matches(Token.EOF, 0, 1) else None - epsilonTargetMethods[Transition.RANGE] = lambda sim, config, t, collectPredicates, inContext, fullCtx, treatEofAsEpsilon: \ - ATNConfig(state=t.target, config=config) if treatEofAsEpsilon and t.matches(Token.EOF, 0, 1) else None - epsilonTargetMethods[Transition.SET] = lambda sim, config, t, collectPredicates, inContext, fullCtx, treatEofAsEpsilon: \ - ATNConfig(state=t.target, config=config) if treatEofAsEpsilon and t.matches(Token.EOF, 0, 1) else None - - def getEpsilonTarget(self, config, t, collectPredicates, inContext, fullCtx, treatEofAsEpsilon): - m = self.epsilonTargetMethods.get(t.serializationType, None) - if m is None: - return None - else: - return m(self, config, t, collectPredicates, inContext, fullCtx, treatEofAsEpsilon) - - def actionTransition(self, config, t): - if ParserATNSimulator.debug: - print("ACTION edge " + str(t.ruleIndex) + ":" + str(t.actionIndex)) - return ATNConfig(state=t.target, config=config) - - def precedenceTransition(self, config, pt, collectPredicates, inContext, fullCtx): - if ParserATNSimulator.debug: - print("PRED (collectPredicates=" + str(collectPredicates) + ") " + - str(pt.precedence) + ">=_p, ctx dependent=true") - if self.parser is not None: - print("context surrounding pred is " + str(self.parser.getRuleInvocationStack())) - - c = None - if collectPredicates and inContext: - if fullCtx: - # In full context mode, we can evaluate predicates on-the-fly - # during closure, which dramatically reduces the size of - # the config sets. It also obviates the need to test predicates - # later during conflict resolution. - currentPosition = self._input.index - self._input.seek(self._startIndex) - predSucceeds = pt.getPredicate().eval(self.parser, self._outerContext) - self._input.seek(currentPosition) - if predSucceeds: - c = ATNConfig(state=pt.target, config=config) # no pred context - else: - newSemCtx = andContext(config.semanticContext, pt.getPredicate()) - c = ATNConfig(state=pt.target, semantic=newSemCtx, config=config) - else: - c = ATNConfig(state=pt.target, config=config) - - if ParserATNSimulator.debug: - print("config from pred transition=" + str(c)) - return c - - def predTransition(self, config, pt, collectPredicates, inContext, fullCtx): - if ParserATNSimulator.debug: - print("PRED (collectPredicates=" + str(collectPredicates) + ") " + str(pt.ruleIndex) + - ":" + str(pt.predIndex) + ", ctx dependent=" + str(pt.isCtxDependent)) - if self.parser is not None: - print("context surrounding pred is " + str(self.parser.getRuleInvocationStack())) - - c = None - if collectPredicates and (not pt.isCtxDependent or (pt.isCtxDependent and inContext)): - if fullCtx: - # In full context mode, we can evaluate predicates on-the-fly - # during closure, which dramatically reduces the size of - # the config sets. It also obviates the need to test predicates - # later during conflict resolution. - currentPosition = self._input.index - self._input.seek(self._startIndex) - predSucceeds = pt.getPredicate().eval(self.parser, self._outerContext) - self._input.seek(currentPosition) - if predSucceeds: - c = ATNConfig(state=pt.target, config=config) # no pred context - else: - newSemCtx = andContext(config.semanticContext, pt.getPredicate()) - c = ATNConfig(state=pt.target, semantic=newSemCtx, config=config) - else: - c = ATNConfig(state=pt.target, config=config) - - if ParserATNSimulator.debug: - print("config from pred transition=" + str(c)) - return c - - def ruleTransition(self, config, t): - if ParserATNSimulator.debug: - print("CALL rule " + self.getRuleName(t.target.ruleIndex) + ", ctx=" + str(config.context)) - returnState = t.followState - newContext = SingletonPredictionContext.create(config.context, returnState.stateNumber) - return ATNConfig(state=t.target, context=newContext, config=config ) - - def getConflictingAlts(self, configs): - altsets = PredictionMode.getConflictingAltSubsets(configs) - return PredictionMode.getAlts(altsets) - - # Sam pointed out a problem with the previous definition, v3, of - # ambiguous states. If we have another state associated with conflicting - # alternatives, we should keep going. For example, the following grammar - # - # s : (ID | ID ID?) ';' ; - # - # When the ATN simulation reaches the state before ';', it has a DFA - # state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally - # 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node - # because alternative to has another way to continue, via [6|2|[]]. - # The key is that we have a single state that has config's only associated - # with a single alternative, 2, and crucially the state transitions - # among the configurations are all non-epsilon transitions. That means - # we don't consider any conflicts that include alternative 2. So, we - # ignore the conflict between alts 1 and 2. We ignore a set of - # conflicting alts when there is an intersection with an alternative - # associated with a single alt state in the state→config-list map. - # - # It's also the case that we might have two conflicting configurations but - # also a 3rd nonconflicting configuration for a different alternative: - # [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar: - # - # a : A | A | A B ; - # - # After matching input A, we reach the stop state for rule A, state 1. - # State 8 is the state right before B. Clearly alternatives 1 and 2 - # conflict and no amount of further lookahead will separate the two. - # However, alternative 3 will be able to continue and so we do not - # stop working on this state. In the previous example, we're concerned - # with states associated with the conflicting alternatives. Here alt - # 3 is not associated with the conflicting configs, but since we can continue - # looking for input reasonably, I don't declare the state done. We - # ignore a set of conflicting alts when we have an alternative - # that we still need to pursue. - # - - def getConflictingAltsOrUniqueAlt(self, configs): - conflictingAlts = None - if configs.uniqueAlt!= ATN.INVALID_ALT_NUMBER: - conflictingAlts = set() - conflictingAlts.add(configs.uniqueAlt) - else: - conflictingAlts = configs.conflictingAlts - return conflictingAlts - - def getTokenName(self, t): - if t==Token.EOF: - return u"EOF" - if self.parser is not None and \ - self.parser.literalNames is not None and \ - t < len(self.parser.literalNames): - return self.parser.literalNames[t] + u"<" + unicode(t) + ">" - if self.parser is not None and \ - self.parser.symbolicNames is not None and \ - t < len(self.parser.symbolicNames): - return self.parser.symbolicNames[t] + u"<" + unicode(t) + ">" - else: - return unicode(t) - - def getLookaheadName(self, input): - return self.getTokenName(input.LA(1)) - - # Used for debugging in adaptivePredict around execATN but I cut - # it out for clarity now that alg. works well. We can leave this - # "dead" code for a bit. - # - def dumpDeadEndConfigs(self, nvae): - print("dead end configs: ") - for c in nvae.getDeadEndConfigs(): - trans = "no edges" - if len(c.state.transitions)>0: - t = c.state.transitions[0] - if isinstance(t, AtomTransition): - trans = "Atom "+ self.getTokenName(t.label) - elif isinstance(t, SetTransition): - neg = isinstance(t, NotSetTransition) - trans = ("~" if neg else "")+"Set "+ str(t.set) - print(c.toString(self.parser, True) + ":" + trans, file=sys.stderr) - - def noViableAlt(self, input, outerContext, configs, startIndex): - return NoViableAltException(self.parser, input, input.get(startIndex), input.LT(1), configs, outerContext) - - def getUniqueAlt(self, configs): - alt = ATN.INVALID_ALT_NUMBER - for c in configs: - if alt == ATN.INVALID_ALT_NUMBER: - alt = c.alt # found first alt - elif c.alt!=alt: - return ATN.INVALID_ALT_NUMBER - return alt - - # - # Add an edge to the DFA, if possible. This method calls - # {@link #addDFAState} to ensure the {@code to} state is present in the - # DFA. If {@code from} is {@code null}, or if {@code t} is outside the - # range of edges that can be represented in the DFA tables, this method - # returns without adding the edge to the DFA. - # - #

      If {@code to} is {@code null}, this method returns {@code null}. - # Otherwise, this method returns the {@link DFAState} returned by calling - # {@link #addDFAState} for the {@code to} state.

      - # - # @param dfa The DFA - # @param from The source state for the edge - # @param t The input symbol - # @param to The target state for the edge - # - # @return If {@code to} is {@code null}, this method returns {@code null}; - # otherwise this method returns the result of calling {@link #addDFAState} - # on {@code to} - # - def addDFAEdge(self, dfa, from_, t, to): - if ParserATNSimulator.debug: - print("EDGE " + str(from_) + " -> " + str(to) + " upon " + self.getTokenName(t)) - - if to is None: - return None - - to = self.addDFAState(dfa, to) # used existing if possible not incoming - if from_ is None or t < -1 or t > self.atn.maxTokenType: - return to - - if from_.edges is None: - from_.edges = [None] * (self.atn.maxTokenType + 2) - from_.edges[t+1] = to # connect - - if ParserATNSimulator.debug: - names = None if self.parser is None else self.parser.literalNames - print("DFA=\n" + dfa.toString(names)) - - return to - - # - # Add state {@code D} to the DFA if it is not already present, and return - # the actual instance stored in the DFA. If a state equivalent to {@code D} - # is already in the DFA, the existing state is returned. Otherwise this - # method returns {@code D} after adding it to the DFA. - # - #

      If {@code D} is {@link #ERROR}, this method returns {@link #ERROR} and - # does not change the DFA.

      - # - # @param dfa The dfa - # @param D The DFA state to add - # @return The state stored in the DFA. This will be either the existing - # state if {@code D} is already in the DFA, or {@code D} itself if the - # state was not already present. - # - def addDFAState(self, dfa, D): - if D is self.ERROR: - return D - - - existing = dfa.states.get(D, None) - if existing is not None: - if ParserATNSimulator.trace_atn_sim: print("addDFAState", str(D), "exists") - return existing - - D.stateNumber = len(dfa.states) - if not D.configs.readonly: - D.configs.optimizeConfigs(self) - D.configs.setReadonly(True) - dfa.states[D] = D - - if ParserATNSimulator.trace_atn_sim: print("addDFAState new", str(D)) - - return D - - def reportAttemptingFullContext(self, dfa, conflictingAlts, configs, startIndex, stopIndex): - if ParserATNSimulator.debug or ParserATNSimulator.retry_debug: - print("reportAttemptingFullContext decision=" + str(dfa.decision) + ":" + str(configs) + - ", input=" + self.parser.getTokenStream().getText(startIndex, stopIndex + 1)) - if self.parser is not None: - self.parser.getErrorListenerDispatch().reportAttemptingFullContext(self.parser, dfa, startIndex, stopIndex, conflictingAlts, configs) - - def reportContextSensitivity(self, dfa, prediction, configs, startIndex, stopIndex): - if ParserATNSimulator.debug or ParserATNSimulator.retry_debug: - print("reportContextSensitivity decision=" + str(dfa.decision) + ":" + str(configs) + - ", input=" + self.parser.getTokenStream().getText(startIndex, stopIndex + 1)) - if self.parser is not None: - self.parser.getErrorListenerDispatch().reportContextSensitivity(self.parser, dfa, startIndex, stopIndex, prediction, configs) - - # If context sensitive parsing, we know it's ambiguity not conflict# - def reportAmbiguity(self, dfa, D, startIndex, stopIndex, - exact, ambigAlts, configs ): - if ParserATNSimulator.debug or ParserATNSimulator.retry_debug: -# ParserATNPathFinder finder = new ParserATNPathFinder(parser, atn); -# int i = 1; -# for (Transition t : dfa.atnStartState.transitions) { -# print("ALT "+i+"="); -# print(startIndex+".."+stopIndex+", len(input)="+parser.getInputStream().size()); -# TraceTree path = finder.trace(t.target, parser.getContext(), (TokenStream)parser.getInputStream(), -# startIndex, stopIndex); -# if ( path!=null ) { -# print("path = "+path.toStringTree()); -# for (TraceTree leaf : path.leaves) { -# List states = path.getPathToNode(leaf); -# print("states="+states); -# } -# } -# i++; -# } - print("reportAmbiguity " + str(ambigAlts) + ":" + str(configs) + - ", input=" + self.parser.getTokenStream().getText(startIndex, stopIndex + 1)) - if self.parser is not None: - self.parser.getErrorListenerDispatch().reportAmbiguity(self.parser, dfa, startIndex, stopIndex, exact, ambigAlts, configs) - diff --git a/runtime/Python2/src/antlr4/atn/PredictionMode.py b/runtime/Python2/src/antlr4/atn/PredictionMode.py deleted file mode 100644 index d0658b5537..0000000000 --- a/runtime/Python2/src/antlr4/atn/PredictionMode.py +++ /dev/null @@ -1,495 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# -# -# This enumeration defines the prediction modes available in ANTLR 4 along with -# utility methods for analyzing configuration sets for conflicts and/or -# ambiguities. - - -from antlr4.atn.ATN import ATN -from antlr4.atn.ATNConfig import ATNConfig -from antlr4.atn.ATNConfigSet import ATNConfigSet -from antlr4.atn.ATNState import RuleStopState -from antlr4.atn.SemanticContext import SemanticContext - -class PredictionMode(object): - # - # The SLL(*) prediction mode. This prediction mode ignores the current - # parser context when making predictions. This is the fastest prediction - # mode, and provides correct results for many grammars. This prediction - # mode is more powerful than the prediction mode provided by ANTLR 3, but - # may result in syntax errors for grammar and input combinations which are - # not SLL. - # - #

      - # When using this prediction mode, the parser will either return a correct - # parse tree (i.e. the same parse tree that would be returned with the - # {@link #LL} prediction mode), or it will report a syntax error. If a - # syntax error is encountered when using the {@link #SLL} prediction mode, - # it may be due to either an actual syntax error in the input or indicate - # that the particular combination of grammar and input requires the more - # powerful {@link #LL} prediction abilities to complete successfully.

      - # - #

      - # This prediction mode does not provide any guarantees for prediction - # behavior for syntactically-incorrect inputs.

      - # - SLL = 0 - # - # The LL(*) prediction mode. This prediction mode allows the current parser - # context to be used for resolving SLL conflicts that occur during - # prediction. This is the fastest prediction mode that guarantees correct - # parse results for all combinations of grammars with syntactically correct - # inputs. - # - #

      - # When using this prediction mode, the parser will make correct decisions - # for all syntactically-correct grammar and input combinations. However, in - # cases where the grammar is truly ambiguous this prediction mode might not - # report a precise answer for exactly which alternatives are - # ambiguous.

      - # - #

      - # This prediction mode does not provide any guarantees for prediction - # behavior for syntactically-incorrect inputs.

      - # - LL = 1 - # - # The LL(*) prediction mode with exact ambiguity detection. In addition to - # the correctness guarantees provided by the {@link #LL} prediction mode, - # this prediction mode instructs the prediction algorithm to determine the - # complete and exact set of ambiguous alternatives for every ambiguous - # decision encountered while parsing. - # - #

      - # This prediction mode may be used for diagnosing ambiguities during - # grammar development. Due to the performance overhead of calculating sets - # of ambiguous alternatives, this prediction mode should be avoided when - # the exact results are not necessary.

      - # - #

      - # This prediction mode does not provide any guarantees for prediction - # behavior for syntactically-incorrect inputs.

      - # - LL_EXACT_AMBIG_DETECTION = 2 - - - # - # Computes the SLL prediction termination condition. - # - #

      - # This method computes the SLL prediction termination condition for both of - # the following cases.

      - # - #
        - #
      • The usual SLL+LL fallback upon SLL conflict
      • - #
      • Pure SLL without LL fallback
      • - #
      - # - #

      COMBINED SLL+LL PARSING

      - # - #

      When LL-fallback is enabled upon SLL conflict, correct predictions are - # ensured regardless of how the termination condition is computed by this - # method. Due to the substantially higher cost of LL prediction, the - # prediction should only fall back to LL when the additional lookahead - # cannot lead to a unique SLL prediction.

      - # - #

      Assuming combined SLL+LL parsing, an SLL configuration set with only - # conflicting subsets should fall back to full LL, even if the - # configuration sets don't resolve to the same alternative (e.g. - # {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting - # configuration, SLL could continue with the hopes that more lookahead will - # resolve via one of those non-conflicting configurations.

      - # - #

      Here's the prediction termination rule them: SLL (for SLL+LL parsing) - # stops when it sees only conflicting configuration subsets. In contrast, - # full LL keeps going when there is uncertainty.

      - # - #

      HEURISTIC

      - # - #

      As a heuristic, we stop prediction when we see any conflicting subset - # unless we see a state that only has one alternative associated with it. - # The single-alt-state thing lets prediction continue upon rules like - # (otherwise, it would admit defeat too soon):

      - # - #

      {@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}

      - # - #

      When the ATN simulation reaches the state before {@code ';'}, it has a - # DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally - # {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop - # processing this node because alternative to has another way to continue, - # via {@code [6|2|[]]}.

      - # - #

      It also let's us continue for this rule:

      - # - #

      {@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}

      - # - #

      After matching input A, we reach the stop state for rule A, state 1. - # State 8 is the state right before B. Clearly alternatives 1 and 2 - # conflict and no amount of further lookahead will separate the two. - # However, alternative 3 will be able to continue and so we do not stop - # working on this state. In the previous example, we're concerned with - # states associated with the conflicting alternatives. Here alt 3 is not - # associated with the conflicting configs, but since we can continue - # looking for input reasonably, don't declare the state done.

      - # - #

      PURE SLL PARSING

      - # - #

      To handle pure SLL parsing, all we have to do is make sure that we - # combine stack contexts for configurations that differ only by semantic - # predicate. From there, we can do the usual SLL termination heuristic.

      - # - #

      PREDICATES IN SLL+LL PARSING

      - # - #

      SLL decisions don't evaluate predicates until after they reach DFA stop - # states because they need to create the DFA cache that works in all - # semantic situations. In contrast, full LL evaluates predicates collected - # during start state computation so it can ignore predicates thereafter. - # This means that SLL termination detection can totally ignore semantic - # predicates.

      - # - #

      Implementation-wise, {@link ATNConfigSet} combines stack contexts but not - # semantic predicate contexts so we might see two configurations like the - # following.

      - # - #

      {@code (s, 1, x, {}), (s, 1, x', {p})}

      - # - #

      Before testing these configurations against others, we have to merge - # {@code x} and {@code x'} (without modifying the existing configurations). - # For example, we test {@code (x+x')==x''} when looking for conflicts in - # the following configurations.

      - # - #

      {@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}

      - # - #

      If the configuration set has predicates (as indicated by - # {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of - # the configurations to strip out all of the predicates so that a standard - # {@link ATNConfigSet} will merge everything ignoring predicates.

      - # - @classmethod - def hasSLLConflictTerminatingPrediction(cls, mode, configs): - # Configs in rule stop states indicate reaching the end of the decision - # rule (local context) or end of start rule (full context). If all - # configs meet this condition, then none of the configurations is able - # to match additional input so we terminate prediction. - # - if cls.allConfigsInRuleStopStates(configs): - return True - - # pure SLL mode parsing - if mode == PredictionMode.SLL: - # Don't bother with combining configs from different semantic - # contexts if we can fail over to full LL; costs more time - # since we'll often fail over anyway. - if configs.hasSemanticContext: - # dup configs, tossing out semantic predicates - dup = ATNConfigSet() - for c in configs: - c = ATNConfig(config=c, semantic=SemanticContext.NONE) - dup.add(c) - configs = dup - # now we have combined contexts for configs with dissimilar preds - - # pure SLL or combined SLL+LL mode parsing - altsets = cls.getConflictingAltSubsets(configs) - return cls.hasConflictingAltSet(altsets) and not cls.hasStateAssociatedWithOneAlt(configs) - - # Checks if any configuration in {@code configs} is in a - # {@link RuleStopState}. Configurations meeting this condition have reached - # the end of the decision rule (local context) or end of start rule (full - # context). - # - # @param configs the configuration set to test - # @return {@code true} if any configuration in {@code configs} is in a - # {@link RuleStopState}, otherwise {@code false} - @classmethod - def hasConfigInRuleStopState(cls, configs): - return any(isinstance(cfg.state, RuleStopState) for cfg in configs) - - # Checks if all configurations in {@code configs} are in a - # {@link RuleStopState}. Configurations meeting this condition have reached - # the end of the decision rule (local context) or end of start rule (full - # context). - # - # @param configs the configuration set to test - # @return {@code true} if all configurations in {@code configs} are in a - # {@link RuleStopState}, otherwise {@code false} - @classmethod - def allConfigsInRuleStopStates(cls, configs): - return all(isinstance(cfg.state, RuleStopState) for cfg in configs) - - # - # Full LL prediction termination. - # - #

      Can we stop looking ahead during ATN simulation or is there some - # uncertainty as to which alternative we will ultimately pick, after - # consuming more input? Even if there are partial conflicts, we might know - # that everything is going to resolve to the same minimum alternative. That - # means we can stop since no more lookahead will change that fact. On the - # other hand, there might be multiple conflicts that resolve to different - # minimums. That means we need more look ahead to decide which of those - # alternatives we should predict.

      - # - #

      The basic idea is to split the set of configurations {@code C}, into - # conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with - # non-conflicting configurations. Two configurations conflict if they have - # identical {@link ATNConfig#state} and {@link ATNConfig#context} values - # but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)} - # and {@code (s, j, ctx, _)} for {@code i!=j}.

      - # - #

      Reduce these configuration subsets to the set of possible alternatives. - # You can compute the alternative subsets in one pass as follows:

      - # - #

      {@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in - # {@code C} holding {@code s} and {@code ctx} fixed.

      - # - #

      Or in pseudo-code, for each configuration {@code c} in {@code C}:

      - # - #
      -    # map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
      -    # alt and not pred
      -    # 
      - # - #

      The values in {@code map} are the set of {@code A_s,ctx} sets.

      - # - #

      If {@code |A_s,ctx|=1} then there is no conflict associated with - # {@code s} and {@code ctx}.

      - # - #

      Reduce the subsets to singletons by choosing a minimum of each subset. If - # the union of these alternative subsets is a singleton, then no amount of - # more lookahead will help us. We will always pick that alternative. If, - # however, there is more than one alternative, then we are uncertain which - # alternative to predict and must continue looking for resolution. We may - # or may not discover an ambiguity in the future, even if there are no - # conflicting subsets this round.

      - # - #

      The biggest sin is to terminate early because it means we've made a - # decision but were uncertain as to the eventual outcome. We haven't used - # enough lookahead. On the other hand, announcing a conflict too late is no - # big deal; you will still have the conflict. It's just inefficient. It - # might even look until the end of file.

      - # - #

      No special consideration for semantic predicates is required because - # predicates are evaluated on-the-fly for full LL prediction, ensuring that - # no configuration contains a semantic context during the termination - # check.

      - # - #

      CONFLICTING CONFIGS

      - # - #

      Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict - # when {@code i!=j} but {@code x=x'}. Because we merge all - # {@code (s, i, _)} configurations together, that means that there are at - # most {@code n} configurations associated with state {@code s} for - # {@code n} possible alternatives in the decision. The merged stacks - # complicate the comparison of configuration contexts {@code x} and - # {@code x'}. Sam checks to see if one is a subset of the other by calling - # merge and checking to see if the merged result is either {@code x} or - # {@code x'}. If the {@code x} associated with lowest alternative {@code i} - # is the superset, then {@code i} is the only possible prediction since the - # others resolve to {@code min(i)} as well. However, if {@code x} is - # associated with {@code j>i} then at least one stack configuration for - # {@code j} is not in conflict with alternative {@code i}. The algorithm - # should keep going, looking for more lookahead due to the uncertainty.

      - # - #

      For simplicity, I'm doing a equality check between {@code x} and - # {@code x'} that lets the algorithm continue to consume lookahead longer - # than necessary. The reason I like the equality is of course the - # simplicity but also because that is the test you need to detect the - # alternatives that are actually in conflict.

      - # - #

      CONTINUE/STOP RULE

      - # - #

      Continue if union of resolved alternative sets from non-conflicting and - # conflicting alternative subsets has more than one alternative. We are - # uncertain about which alternative to predict.

      - # - #

      The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which - # alternatives are still in the running for the amount of input we've - # consumed at this point. The conflicting sets let us to strip away - # configurations that won't lead to more states because we resolve - # conflicts to the configuration with a minimum alternate for the - # conflicting set.

      - # - #

      CASES

      - # - #
        - # - #
      • no conflicts and more than 1 alternative in set => continue
      • - # - #
      • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)}, - # {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set - # {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = - # {@code {1,3}} => continue - #
      • - # - #
      • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, - # {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set - # {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = - # {@code {1}} => stop and predict 1
      • - # - #
      • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, - # {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U - # {@code {1}} = {@code {1}} => stop and predict 1, can announce - # ambiguity {@code {1,2}}
      • - # - #
      • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)}, - # {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U - # {@code {2}} = {@code {1,2}} => continue
      • - # - #
      • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)}, - # {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U - # {@code {3}} = {@code {1,3}} => continue
      • - # - #
      - # - #

      EXACT AMBIGUITY DETECTION

      - # - #

      If all states report the same conflicting set of alternatives, then we - # know we have the exact ambiguity set.

      - # - #

      |A_i|>1 and - # A_i = A_j for all i, j.

      - # - #

      In other words, we continue examining lookahead until all {@code A_i} - # have more than one alternative and all {@code A_i} are the same. If - # {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate - # because the resolved set is {@code {1}}. To determine what the real - # ambiguity is, we have to know whether the ambiguity is between one and - # two or one and three so we keep going. We can only stop prediction when - # we need exact ambiguity detection when the sets look like - # {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...

      - # - @classmethod - def resolvesToJustOneViableAlt(cls, altsets): - return cls.getSingleViableAlt(altsets) - - # - # Determines if every alternative subset in {@code altsets} contains more - # than one alternative. - # - # @param altsets a collection of alternative subsets - # @return {@code true} if every {@link BitSet} in {@code altsets} has - # {@link BitSet#cardinality cardinality} > 1, otherwise {@code false} - # - @classmethod - def allSubsetsConflict(cls, altsets): - return not cls.hasNonConflictingAltSet(altsets) - - # - # Determines if any single alternative subset in {@code altsets} contains - # exactly one alternative. - # - # @param altsets a collection of alternative subsets - # @return {@code true} if {@code altsets} contains a {@link BitSet} with - # {@link BitSet#cardinality cardinality} 1, otherwise {@code false} - # - @classmethod - def hasNonConflictingAltSet(cls, altsets): - return any(len(alts) == 1 for alts in altsets) - - # - # Determines if any single alternative subset in {@code altsets} contains - # more than one alternative. - # - # @param altsets a collection of alternative subsets - # @return {@code true} if {@code altsets} contains a {@link BitSet} with - # {@link BitSet#cardinality cardinality} > 1, otherwise {@code false} - # - @classmethod - def hasConflictingAltSet(cls, altsets): - return any(len(alts) > 1 for alts in altsets) - - # - # Determines if every alternative subset in {@code altsets} is equivalent. - # - # @param altsets a collection of alternative subsets - # @return {@code true} if every member of {@code altsets} is equal to the - # others, otherwise {@code false} - # - @classmethod - def allSubsetsEqual(cls, altsets): - if not altsets: - return True - return all(alts == altsets[0] for alts in altsets[1:]) - - # - # Returns the unique alternative predicted by all alternative subsets in - # {@code altsets}. If no such alternative exists, this method returns - # {@link ATN#INVALID_ALT_NUMBER}. - # - # @param altsets a collection of alternative subsets - # - @classmethod - def getUniqueAlt(cls, altsets): - all = cls.getAlts(altsets) - if len(all)==1: - return all.pop() - return ATN.INVALID_ALT_NUMBER - - # Gets the complete set of represented alternatives for a collection of - # alternative subsets. This method returns the union of each {@link BitSet} - # in {@code altsets}. - # - # @param altsets a collection of alternative subsets - # @return the set of represented alternatives in {@code altsets} - # - @classmethod - def getAlts(cls, altsets): - return set.union(*altsets) - - # - # This function gets the conflicting alt subsets from a configuration set. - # For each configuration {@code c} in {@code configs}: - # - #
      -    # map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
      -    # alt and not pred
      -    # 
      - # - @classmethod - def getConflictingAltSubsets(cls, configs): - configToAlts = dict() - for c in configs: - h = hash((c.state.stateNumber, c.context)) - alts = configToAlts.get(h, None) - if alts is None: - alts = set() - configToAlts[h] = alts - alts.add(c.alt) - return configToAlts.values() - - # - # Get a map from state to alt subset from a configuration set. For each - # configuration {@code c} in {@code configs}: - # - #
      -    # map[c.{@link ATNConfig#state state}] U= c.{@link ATNConfig#alt alt}
      -    # 
      - # - @classmethod - def getStateToAltMap(cls, configs): - m = dict() - for c in configs: - alts = m.get(c.state, None) - if alts is None: - alts = set() - m[c.state] = alts - alts.add(c.alt) - return m - - @classmethod - def hasStateAssociatedWithOneAlt(cls, configs): - return any(len(alts) == 1 for alts in cls.getStateToAltMap(configs).values()) - - @classmethod - def getSingleViableAlt(cls, altsets): - viableAlts = set() - for alts in altsets: - minAlt = min(alts) - viableAlts.add(minAlt); - if len(viableAlts)>1 : # more than 1 viable alt - return ATN.INVALID_ALT_NUMBER - return min(viableAlts) diff --git a/runtime/Python2/src/antlr4/atn/SemanticContext.py b/runtime/Python2/src/antlr4/atn/SemanticContext.py deleted file mode 100644 index 17f0a9d67b..0000000000 --- a/runtime/Python2/src/antlr4/atn/SemanticContext.py +++ /dev/null @@ -1,328 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - -# A tree structure used to record the semantic context in which -# an ATN configuration is valid. It's either a single predicate, -# a conjunction {@code p1&&p2}, or a sum of products {@code p1||p2}. -# -#

      I have scoped the {@link AND}, {@link OR}, and {@link Predicate} subclasses of -# {@link SemanticContext} within the scope of this outer class.

      -# -from io import StringIO - - -class SemanticContext(object): - # - # The default {@link SemanticContext}, which is semantically equivalent to - # a predicate of the form {@code {true}?}. - # - NONE = None - - # - # For context independent predicates, we evaluate them without a local - # context (i.e., null context). That way, we can evaluate them without - # having to create proper rule-specific context during prediction (as - # opposed to the parser, which creates them naturally). In a practical - # sense, this avoids a cast exception from RuleContext to myruleContext. - # - #

      For context dependent predicates, we must pass in a local context so that - # references such as $arg evaluate properly as _localctx.arg. We only - # capture context dependent predicates in the context in which we begin - # prediction, so we passed in the outer context here in case of context - # dependent predicate evaluation.

      - # - def eval(self, parser, outerContext): - pass - - # - # Evaluate the precedence predicates for the context and reduce the result. - # - # @param parser The parser instance. - # @param outerContext The current parser context object. - # @return The simplified semantic context after precedence predicates are - # evaluated, which will be one of the following values. - #
        - #
      • {@link #NONE}: if the predicate simplifies to {@code true} after - # precedence predicates are evaluated.
      • - #
      • {@code null}: if the predicate simplifies to {@code false} after - # precedence predicates are evaluated.
      • - #
      • {@code this}: if the semantic context is not changed as a result of - # precedence predicate evaluation.
      • - #
      • A non-{@code null} {@link SemanticContext}: the new simplified - # semantic context after precedence predicates are evaluated.
      • - #
      - # - def evalPrecedence(self, parser, outerContext): - return self - - def __str__(self): - return unicode(self) - - def __unicode__(self): - return unicode(super(SemanticContext, self)) - - -def andContext(a, b): - if a is None or a is SemanticContext.NONE: - return b - if b is None or b is SemanticContext.NONE: - return a - result = AND(a, b) - if len(result.opnds) == 1: - return result.opnds[0] - else: - return result - -def orContext(a, b): - if a is None: - return b - if b is None: - return a - if a is SemanticContext.NONE or b is SemanticContext.NONE: - return SemanticContext.NONE - result = OR(a, b) - if len(result.opnds) == 1: - return result.opnds[0] - else: - return result - -def filterPrecedencePredicates(collection): - return [context for context in collection if isinstance(context, PrecedencePredicate)] - -class EmptySemanticContext(SemanticContext): - pass - -class Predicate(SemanticContext): - - def __init__(self, ruleIndex=-1, predIndex=-1, isCtxDependent=False): - self.ruleIndex = ruleIndex - self.predIndex = predIndex - self.isCtxDependent = isCtxDependent # e.g., $i ref in pred - - def eval(self, parser, outerContext): - localctx = outerContext if self.isCtxDependent else None - return parser.sempred(localctx, self.ruleIndex, self.predIndex) - - def __hash__(self): - return hash((self.ruleIndex, self.predIndex, self.isCtxDependent)) - - def __eq__(self, other): - if self is other: - return True - elif not isinstance(other, Predicate): - return False - return self.ruleIndex == other.ruleIndex and \ - self.predIndex == other.predIndex and \ - self.isCtxDependent == other.isCtxDependent - - def __unicode__(self): - return u"{" + unicode(self.ruleIndex) + u":" + unicode(self.predIndex) + u"}?" - - -class PrecedencePredicate(SemanticContext): - - def __init__(self, precedence=0): - self.precedence = precedence - - def eval(self, parser, outerContext): - return parser.precpred(outerContext, self.precedence) - - def evalPrecedence(self, parser, outerContext): - if parser.precpred(outerContext, self.precedence): - return SemanticContext.NONE - else: - return None - - def __cmp__(self, other): - return self.precedence - other.precedence - - def __hash__(self): - return 31 - - def __eq__(self, other): - if self is other: - return True - elif not isinstance(other, PrecedencePredicate): - return False - else: - return self.precedence == other.precedence - - def __str__(self): - return unicode(self) - - def __unicode__(self): - return u"{" + unicode(self.precedence) + u">=prec}?" - - -# A semantic context which is true whenever none of the contained contexts -# is false. -# -class AND(SemanticContext): - - def __init__(self, a, b): - operands = set() - if isinstance( a, AND): - operands.update(a.opnds) - else: - operands.add(a) - if isinstance( b, AND): - operands.update(b.opnds) - else: - operands.add(b) - - precedencePredicates = filterPrecedencePredicates(operands) - if len(precedencePredicates)>0: - # interested in the transition with the lowest precedence - reduced = min(precedencePredicates) - operands.add(reduced) - - self.opnds = list(operands) - - def __eq__(self, other): - if self is other: - return True - elif not isinstance(other, AND): - return False - else: - return self.opnds == other.opnds - - def __hash__(self): - h = 0 - for o in self.opnds: - h = hash((h, o)) - return hash((h, "AND")) - - # - # {@inheritDoc} - # - #

      - # The evaluation of predicates by this context is short-circuiting, but - # unordered.

      - # - def eval(self, parser, outerContext): - return all(opnd.eval(parser, outerContext) for opnd in self.opnds) - - def evalPrecedence(self, parser, outerContext): - differs = False - operands = [] - for context in self.opnds: - evaluated = context.evalPrecedence(parser, outerContext) - differs |= evaluated is not context - if evaluated is None: - # The AND context is false if any element is false - return None - elif evaluated is not SemanticContext.NONE: - # Reduce the result by skipping true elements - operands.append(evaluated) - - if not differs: - return self - - if len(operands)==0: - # all elements were true, so the AND context is true - return SemanticContext.NONE - - result = None - for o in operands: - result = o if result is None else andContext(result, o) - - return result - - def __unicode__(self): - with StringIO() as buf: - first = True - for o in self.opnds: - if not first: - buf.write(u"&&") - buf.write(unicode(o)) - first = False - return buf.getvalue() - -# -# A semantic context which is true whenever at least one of the contained -# contexts is true. -# -class OR (SemanticContext): - - def __init__(self, a, b): - operands = set() - if isinstance( a, OR): - operands.update(a.opnds) - else: - operands.add(a) - if isinstance( b, OR): - operands.update(b.opnds) - else: - operands.add(b) - - precedencePredicates = filterPrecedencePredicates(operands) - if len(precedencePredicates)>0: - # interested in the transition with the highest precedence - s = sorted(precedencePredicates) - reduced = s[-1] - operands.add(reduced) - - self.opnds = list(operands) - - def __eq__(self, other): - if self is other: - return True - elif not isinstance(other, OR): - return False - else: - return self.opnds == other.opnds - - def __hash__(self): - h = 0 - for o in self.opnds: - h = hash((h, o)) - return hash((h, "OR")) - - #

      - # The evaluation of predicates by this context is short-circuiting, but - # unordered.

      - # - def eval(self, parser, outerContext): - return any(opnd.eval(parser, outerContext) for opnd in self.opnds) - - def evalPrecedence(self, parser, outerContext): - differs = False - operands = [] - for context in self.opnds: - evaluated = context.evalPrecedence(parser, outerContext) - differs |= evaluated is not context - if evaluated is SemanticContext.NONE: - # The OR context is true if any element is true - return SemanticContext.NONE - elif evaluated is not None: - # Reduce the result by skipping false elements - operands.append(evaluated) - - if not differs: - return self - - if len(operands)==0: - # all elements were false, so the OR context is false - return None - - result = None - for o in operands: - result = o if result is None else orContext(result, o) - - return result - - def __unicode__(self): - with StringIO() as buf: - first = True - for o in self.opnds: - if not first: - buf.write(u"||") - buf.write(unicode(o)) - first = False - return buf.getvalue() - - -SemanticContext.NONE = EmptySemanticContext() diff --git a/runtime/Python2/src/antlr4/atn/Transition.py b/runtime/Python2/src/antlr4/atn/Transition.py deleted file mode 100644 index a5f3c8b3f9..0000000000 --- a/runtime/Python2/src/antlr4/atn/Transition.py +++ /dev/null @@ -1,252 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - -# An ATN transition between any two ATN states. Subclasses define -# atom, set, epsilon, action, predicate, rule transitions. -# -#

      This is a one way link. It emanates from a state (usually via a list of -# transitions) and has a target state.

      -# -#

      Since we never have to change the ATN transitions once we construct it, -# we can fix these transitions as specific classes. The DFA transitions -# on the other hand need to update the labels as it adds transitions to -# the states. We'll use the term Edge for the DFA to distinguish them from -# ATN transitions.

      -# -from __builtin__ import unicode -from antlr4.IntervalSet import IntervalSet, Interval -from antlr4.Token import Token -from antlr4.atn.SemanticContext import Predicate, PrecedencePredicate - -class Transition (object): - # constants for serialization - EPSILON = 1 - RANGE = 2 - RULE = 3 - PREDICATE = 4 # e.g., {isType(input.LT(1))}? - ATOM = 5 - ACTION = 6 - SET = 7 # ~(A|B) or ~atom, wildcard, which convert to next 2 - NOT_SET = 8 - WILDCARD = 9 - PRECEDENCE = 10 - - serializationNames = [ - u"INVALID", - u"EPSILON", - u"RANGE", - u"RULE", - u"PREDICATE", - u"ATOM", - u"ACTION", - u"SET", - u"NOT_SET", - u"WILDCARD", - u"PRECEDENCE" - ] - - serializationTypes = dict() - - def __init__(self, target): - # The target of this transition. - if target is None: - raise Exception("target cannot be null.") - self.target = target - # Are we epsilon, action, sempred? - self.isEpsilon = False - self.label = None - - def __str__(self): - return unicode(self) - - -# TODO: make all transitions sets? no, should remove set edges -class AtomTransition(Transition): - - def __init__(self, target, label): - super(AtomTransition, self).__init__(target) - self.label_ = label # The token type or character value; or, signifies special label. - self.label = self.makeLabel() - self.serializationType = self.ATOM - - def makeLabel(self): - s = IntervalSet() - s.addOne(self.label_) - return s - - def matches( self, symbol, minVocabSymbol, maxVocabSymbol): - return self.label_ == symbol - - def __unicode__(self): - return unicode(self.label_) - -class RuleTransition(Transition): - - def __init__(self, ruleStart, ruleIndex, precedence, followState): - super(RuleTransition, self).__init__(ruleStart) - self.ruleIndex = ruleIndex # ptr to the rule definition object for this rule ref - self.precedence = precedence - self.followState = followState # what node to begin computations following ref to rule - self.serializationType = self.RULE - self.isEpsilon = True - - def matches( self, symbol, minVocabSymbol, maxVocabSymbol): - return False - - -class EpsilonTransition(Transition): - - def __init__(self, target, outermostPrecedenceReturn=-1): - super(EpsilonTransition, self).__init__(target) - self.serializationType = self.EPSILON - self.isEpsilon = True - self.outermostPrecedenceReturn = outermostPrecedenceReturn - - def matches( self, symbol, minVocabSymbol, maxVocabSymbol): - return False - - def __unicode__(self): - return "epsilon" - -class RangeTransition(Transition): - - def __init__(self, target, start, stop): - super(RangeTransition, self).__init__(target) - self.serializationType = self.RANGE - self.start = start - self.stop = stop - self.label = self.makeLabel() - - def makeLabel(self): - s = IntervalSet() - s.addRange(Interval(self.start, self.stop + 1)) - return s - - def matches( self, symbol, minVocabSymbol, maxVocabSymbol): - return symbol >= self.start and symbol <= self.stop - - def __unicode__(self): - return "'" + chr(self.start) + "'..'" + chr(self.stop) + "'" - -class AbstractPredicateTransition(Transition): - - def __init__(self, target): - super(AbstractPredicateTransition, self).__init__(target) - - -class PredicateTransition(AbstractPredicateTransition): - - def __init__(self, target, ruleIndex, predIndex, isCtxDependent): - super(PredicateTransition, self).__init__(target) - self.serializationType = self.PREDICATE - self.ruleIndex = ruleIndex - self.predIndex = predIndex - self.isCtxDependent = isCtxDependent # e.g., $i ref in pred - self.isEpsilon = True - - def matches( self, symbol, minVocabSymbol, maxVocabSymbol): - return False - - def getPredicate(self): - return Predicate(self.ruleIndex, self.predIndex, self.isCtxDependent) - - def __unicode__(self): - return u"pred_" + unicode(self.ruleIndex) + u":" + unicode(self.predIndex) - -class ActionTransition(Transition): - - def __init__(self, target, ruleIndex, actionIndex=-1, isCtxDependent=False): - super(ActionTransition, self).__init__(target) - self.serializationType = self.ACTION - self.ruleIndex = ruleIndex - self.actionIndex = actionIndex - self.isCtxDependent = isCtxDependent # e.g., $i ref in pred - self.isEpsilon = True - - def matches( self, symbol, minVocabSymbol, maxVocabSymbol): - return False - - def __unicode__(self): - return u"action_" + unicode(self.ruleIndex) + u":" + unicode(self.actionIndex) - -# A transition containing a set of values. -class SetTransition(Transition): - - def __init__(self, target, set): - super(SetTransition, self).__init__(target) - self.serializationType = self.SET - if set is not None: - self.label = set - else: - self.label = IntervalSet() - self.label.addRange(Interval(Token.INVALID_TYPE, Token.INVALID_TYPE + 1)) - - def matches( self, symbol, minVocabSymbol, maxVocabSymbol): - return symbol in self.label - - def __unicode__(self): - return unicode(self.label) - -class NotSetTransition(SetTransition): - - def __init__(self, target, set): - super(NotSetTransition, self).__init__(target, set) - self.serializationType = self.NOT_SET - - def matches( self, symbol, minVocabSymbol, maxVocabSymbol): - return symbol >= minVocabSymbol \ - and symbol <= maxVocabSymbol \ - and not super(type(self), self).matches(symbol, minVocabSymbol, maxVocabSymbol) - - def __unicode__(self): - return u'~' + super(type(self), self).__unicode__() - - -class WildcardTransition(Transition): - - def __init__(self, target): - super(WildcardTransition, self).__init__(target) - self.serializationType = self.WILDCARD - - def matches( self, symbol, minVocabSymbol, maxVocabSymbol): - return symbol >= minVocabSymbol and symbol <= maxVocabSymbol - - def __unicode__(self): - return u"." - - -class PrecedencePredicateTransition(AbstractPredicateTransition): - - def __init__(self, target, precedence): - super(PrecedencePredicateTransition, self).__init__(target) - self.serializationType = self.PRECEDENCE - self.precedence = precedence - self.isEpsilon = True - - def matches( self, symbol, minVocabSymbol, maxVocabSymbol): - return False - - - def getPredicate(self): - return PrecedencePredicate(self.precedence) - - def __unicode__(self): - return self.precedence + " >= _p" - - -Transition.serializationTypes = { - EpsilonTransition: Transition.EPSILON, - RangeTransition: Transition.RANGE, - RuleTransition: Transition.RULE, - PredicateTransition: Transition.PREDICATE, - AtomTransition: Transition.ATOM, - ActionTransition: Transition.ACTION, - SetTransition: Transition.SET, - NotSetTransition: Transition.NOT_SET, - WildcardTransition: Transition.WILDCARD, - PrecedencePredicateTransition: Transition.PRECEDENCE - } - diff --git a/runtime/Python2/src/antlr4/atn/__init__.py b/runtime/Python2/src/antlr4/atn/__init__.py deleted file mode 100644 index 216c000dc5..0000000000 --- a/runtime/Python2/src/antlr4/atn/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__author__ = 'ericvergnaud' diff --git a/runtime/Python2/src/antlr4/dfa/DFA.py b/runtime/Python2/src/antlr4/dfa/DFA.py deleted file mode 100644 index 6909af607b..0000000000 --- a/runtime/Python2/src/antlr4/dfa/DFA.py +++ /dev/null @@ -1,135 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -from antlr4.atn.ATNState import StarLoopEntryState - -from antlr4.atn.ATNConfigSet import ATNConfigSet -from antlr4.dfa.DFAState import DFAState -from antlr4.error.Errors import IllegalStateException - - -class DFA(object): - - def __init__(self, atnStartState, decision=0): - # From which ATN state did we create this DFA? - self.atnStartState = atnStartState - self.decision = decision - # A set of all DFA states. Use {@link Map} so we can get old state back - # ({@link Set} only allows you to see if it's there). - self._states = dict() - self.s0 = None - # {@code true} if this DFA is for a precedence decision; otherwise, - # {@code false}. This is the backing field for {@link #isPrecedenceDfa}, - # {@link #setPrecedenceDfa}. - self.precedenceDfa = False - - if isinstance(atnStartState, StarLoopEntryState): - if atnStartState.isPrecedenceDecision: - self.precedenceDfa = True - precedenceState = DFAState(configs=ATNConfigSet()) - precedenceState.edges = [] - precedenceState.isAcceptState = False - precedenceState.requiresFullContext = False - self.s0 = precedenceState - - - # Get the start state for a specific precedence value. - # - # @param precedence The current precedence. - # @return The start state corresponding to the specified precedence, or - # {@code null} if no start state exists for the specified precedence. - # - # @throws IllegalStateException if this is not a precedence DFA. - # @see #isPrecedenceDfa() - - def getPrecedenceStartState(self, precedence): - if not self.precedenceDfa: - raise IllegalStateException("Only precedence DFAs may contain a precedence start state.") - - # s0.edges is never null for a precedence DFA - if precedence < 0 or precedence >= len(self.s0.edges): - return None - return self.s0.edges[precedence] - - # Set the start state for a specific precedence value. - # - # @param precedence The current precedence. - # @param startState The start state corresponding to the specified - # precedence. - # - # @throws IllegalStateException if this is not a precedence DFA. - # @see #isPrecedenceDfa() - # - def setPrecedenceStartState(self, precedence, startState): - if not self.precedenceDfa: - raise IllegalStateException("Only precedence DFAs may contain a precedence start state.") - - if precedence < 0: - return - - # synchronization on s0 here is ok. when the DFA is turned into a - # precedence DFA, s0 will be initialized once and not updated again - # s0.edges is never null for a precedence DFA - if precedence >= len(self.s0.edges): - ext = [None] * (precedence + 1 - len(self.s0.edges)) - self.s0.edges.extend(ext) - self.s0.edges[precedence] = startState - # - # Sets whether this is a precedence DFA. If the specified value differs - # from the current DFA configuration, the following actions are taken; - # otherwise no changes are made to the current DFA. - # - #
        - #
      • The {@link #states} map is cleared
      • - #
      • If {@code precedenceDfa} is {@code false}, the initial state - # {@link #s0} is set to {@code null}; otherwise, it is initialized to a new - # {@link DFAState} with an empty outgoing {@link DFAState#edges} array to - # store the start states for individual precedence values.
      • - #
      • The {@link #precedenceDfa} field is updated
      • - #
      - # - # @param precedenceDfa {@code true} if this is a precedence DFA; otherwise, - # {@code false} - - def setPrecedenceDfa(self, precedenceDfa): - if self.precedenceDfa != precedenceDfa: - self._states = dict() - if precedenceDfa: - precedenceState = DFAState(configs=ATNConfigSet()) - precedenceState.edges = [] - precedenceState.isAcceptState = False - precedenceState.requiresFullContext = False - self.s0 = precedenceState - else: - self.s0 = None - self.precedenceDfa = precedenceDfa - - @property - def states(self): - return self._states - - # Return a list of all states in this DFA, ordered by state number. - def sortedStates(self): - return sorted(self._states.keys(), key=lambda state: state.stateNumber) - - def __str__(self): - return unicode(self) - - def __unicode__(self): - return self.toString(None) - - def toString(self, literalNames=None, symbolicNames=None): - if self.s0 is None: - return "" - from antlr4.dfa.DFASerializer import DFASerializer - serializer = DFASerializer(self, literalNames, symbolicNames) - return unicode(serializer) - - def toLexerString(self): - if self.s0 is None: - return "" - from antlr4.dfa.DFASerializer import LexerDFASerializer - serializer = LexerDFASerializer(self) - return unicode(serializer) - diff --git a/runtime/Python2/src/antlr4/dfa/DFASerializer.py b/runtime/Python2/src/antlr4/dfa/DFASerializer.py deleted file mode 100644 index 03aecdca98..0000000000 --- a/runtime/Python2/src/antlr4/dfa/DFASerializer.py +++ /dev/null @@ -1,74 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -#/ - -# A DFA walker that knows how to dump them to serialized strings.#/ -from io import StringIO -from antlr4.Utils import str_list - - -class DFASerializer(object): - - def __init__(self, dfa, literalNames=None, symbolicNames=None): - self.dfa = dfa - self.literalNames = literalNames - self.symbolicNames = symbolicNames - - def __str__(self): - return unicode(self) - - def __unicode__(self): - if self.dfa.s0 is None: - return None - with StringIO() as buf: - for s in self.dfa.sortedStates(): - n = 0 - if s.edges is not None: - n = len(s.edges) - for i in range(0, n): - t = s.edges[i] - if t is not None and t.stateNumber != 0x7FFFFFFF: - buf.write(self.getStateString(s)) - label = self.getEdgeLabel(i) - buf.write(u"-") - buf.write(label) - buf.write(u"->") - buf.write(self.getStateString(t)) - buf.write(u'\n') - output = buf.getvalue() - if len(output)==0: - return None - else: - return output - - def getEdgeLabel(self, i): - if i==0: - return u"EOF" - if self.literalNames is not None and i<=len(self.literalNames): - return self.literalNames[i-1] - elif self.symbolicNames is not None and i<=len(self.symbolicNames): - return self.symbolicNames[i-1] - else: - return unicode(i-1) - - def getStateString(self, s): - n = s.stateNumber - baseStateStr = ( u":" if s.isAcceptState else u"") + u"s" + unicode(n) + \ - ( u"^" if s.requiresFullContext else u"") - if s.isAcceptState: - if s.predicates is not None: - return baseStateStr + u"=>" + str_list(s.predicates) - else: - return baseStateStr + u"=>" + unicode(s.prediction) - else: - return baseStateStr - -class LexerDFASerializer(DFASerializer): - - def __init__(self, dfa): - super(LexerDFASerializer, self).__init__(dfa, None) - - def getEdgeLabel(self, i): - return u"'" + unichr(i) + u"'" diff --git a/runtime/Python2/src/antlr4/dfa/DFAState.py b/runtime/Python2/src/antlr4/dfa/DFAState.py deleted file mode 100644 index fb655249c5..0000000000 --- a/runtime/Python2/src/antlr4/dfa/DFAState.py +++ /dev/null @@ -1,124 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -#/ - -# Map a predicate to a predicted alternative.#/ -from io import StringIO -from antlr4.atn.ATNConfigSet import ATNConfigSet - -class PredPrediction(object): - def __init__(self, pred, alt): - self.alt = alt - self.pred = pred - - def __str__(self): - return unicode(self) - - def __unicode__(self): - return u"(" + unicode(self.pred) + u", " + unicode(self.alt) + u")" - -# A DFA state represents a set of possible ATN configurations. -# As Aho, Sethi, Ullman p. 117 says "The DFA uses its state -# to keep track of all possible states the ATN can be in after -# reading each input symbol. That is to say, after reading -# input a1a2..an, the DFA is in a state that represents the -# subset T of the states of the ATN that are reachable from the -# ATN's start state along some path labeled a1a2..an." -# In conventional NFA→DFA conversion, therefore, the subset T -# would be a bitset representing the set of states the -# ATN could be in. We need to track the alt predicted by each -# state as well, however. More importantly, we need to maintain -# a stack of states, tracking the closure operations as they -# jump from rule to rule, emulating rule invocations (method calls). -# I have to add a stack to simulate the proper lookahead sequences for -# the underlying LL grammar from which the ATN was derived. -# -#

      I use a set of ATNConfig objects not simple states. An ATNConfig -# is both a state (ala normal conversion) and a RuleContext describing -# the chain of rules (if any) followed to arrive at that state.

      -# -#

      A DFA state may have multiple references to a particular state, -# but with different ATN contexts (with same or different alts) -# meaning that state was reached via a different set of rule invocations.

      -#/ -class DFAState(object): - - def __init__(self, stateNumber=-1, configs=ATNConfigSet()): - self.stateNumber = stateNumber - self.configs = configs - # {@code edges[symbol]} points to target of symbol. Shift up by 1 so (-1) - # {@link Token#EOF} maps to {@code edges[0]}. - self.edges = None - self.isAcceptState = False - # if accept state, what ttype do we match or alt do we predict? - # This is set to {@link ATN#INVALID_ALT_NUMBER} when {@link #predicates}{@code !=null} or - # {@link #requiresFullContext}. - self.prediction = 0 - self.lexerActionExecutor = None - # Indicates that this state was created during SLL prediction that - # discovered a conflict between the configurations in the state. Future - # {@link ParserATNSimulator#execATN} invocations immediately jumped doing - # full context prediction if this field is true. - self.requiresFullContext = False - # During SLL parsing, this is a list of predicates associated with the - # ATN configurations of the DFA state. When we have predicates, - # {@link #requiresFullContext} is {@code false} since full context prediction evaluates predicates - # on-the-fly. If this is not null, then {@link #prediction} is - # {@link ATN#INVALID_ALT_NUMBER}. - # - #

      We only use these for non-{@link #requiresFullContext} but conflicting states. That - # means we know from the context (it's $ or we don't dip into outer - # context) that it's an ambiguity not a conflict.

      - # - #

      This list is computed by {@link ParserATNSimulator#predicateDFAState}.

      - self.predicates = None - - - - # Get the set of all alts mentioned by all ATN configurations in this - # DFA state. - def getAltSet(self): - if self.configs is not None: - return set(cfg.alt for cfg in self.configs) or None - return None - - def __hash__(self): - return hash(self.configs) - - # Two {@link DFAState} instances are equal if their ATN configuration sets - # are the same. This method is used to see if a state already exists. - # - #

      Because the number of alternatives and number of ATN configurations are - # finite, there is a finite number of DFA states that can be processed. - # This is necessary to show that the algorithm terminates.

      - # - #

      Cannot test the DFA state numbers here because in - # {@link ParserATNSimulator#addDFAState} we need to know if any other state - # exists that has this exact set of ATN configurations. The - # {@link #stateNumber} is irrelevant.

      - def __eq__(self, other): - # compare set of ATN configurations in this set with other - if self is other: - return True - elif not isinstance(other, DFAState): - return False - else: - return self.configs==other.configs - - def __str__(self): - return unicode(self) - - def __unicode__(self): - with StringIO() as buf: - buf.write(unicode(self.stateNumber)) - buf.write(u":") - buf.write(unicode(self.configs)) - if self.isAcceptState: - buf.write(u"=>") - if self.predicates is not None: - buf.write(unicode(self.predicates)) - else: - buf.write(unicode(self.prediction)) - return buf.getvalue() diff --git a/runtime/Python2/src/antlr4/dfa/__init__.py b/runtime/Python2/src/antlr4/dfa/__init__.py deleted file mode 100644 index 216c000dc5..0000000000 --- a/runtime/Python2/src/antlr4/dfa/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__author__ = 'ericvergnaud' diff --git a/runtime/Python2/src/antlr4/error/DiagnosticErrorListener.py b/runtime/Python2/src/antlr4/error/DiagnosticErrorListener.py deleted file mode 100644 index 592bce46ed..0000000000 --- a/runtime/Python2/src/antlr4/error/DiagnosticErrorListener.py +++ /dev/null @@ -1,106 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - - -# -# This implementation of {@link ANTLRErrorListener} can be used to identify -# certain potential correctness and performance problems in grammars. "Reports" -# are made by calling {@link Parser#notifyErrorListeners} with the appropriate -# message. -# -#
        -#
      • Ambiguities: These are cases where more than one path through the -# grammar can match the input.
      • -#
      • Weak context sensitivity: These are cases where full-context -# prediction resolved an SLL conflict to a unique alternative which equaled the -# minimum alternative of the SLL conflict.
      • -#
      • Strong (forced) context sensitivity: These are cases where the -# full-context prediction resolved an SLL conflict to a unique alternative, -# and the minimum alternative of the SLL conflict was found to not be -# a truly viable alternative. Two-stage parsing cannot be used for inputs where -# this situation occurs.
      • -#
      - -from io import StringIO -from antlr4.Utils import str_set -from antlr4.error.ErrorListener import ErrorListener - -class DiagnosticErrorListener(ErrorListener): - - def __init__(self, exactOnly=True): - # whether all ambiguities or only exact ambiguities are reported. - self.exactOnly = exactOnly - - def reportAmbiguity(self, recognizer, dfa, startIndex, - stopIndex, exact, ambigAlts, configs): - if self.exactOnly and not exact: - return - - with StringIO() as buf: - buf.write(u"reportAmbiguity d=") - buf.write(self.getDecisionDescription(recognizer, dfa)) - buf.write(u": ambigAlts=") - buf.write(str_set(self.getConflictingAlts(ambigAlts, configs))) - buf.write(u", input='") - buf.write(recognizer.getTokenStream().getText(startIndex, stopIndex)) - buf.write(u"'") - recognizer.notifyErrorListeners(buf.getvalue()) - - - def reportAttemptingFullContext(self, recognizer, dfa, startIndex, - stopIndex, conflictingAlts, configs): - with StringIO() as buf: - buf.write(u"reportAttemptingFullContext d=") - buf.write(self.getDecisionDescription(recognizer, dfa)) - buf.write(u", input='") - buf.write(recognizer.getTokenStream().getText(startIndex, stopIndex)) - buf.write(u"'") - recognizer.notifyErrorListeners(buf.getvalue()) - - def reportContextSensitivity(self, recognizer, dfa, startIndex, - stopIndex, prediction, configs): - with StringIO() as buf: - buf.write(u"reportContextSensitivity d=") - buf.write(self.getDecisionDescription(recognizer, dfa)) - buf.write(u", input='") - buf.write(recognizer.getTokenStream().getText(startIndex, stopIndex)) - buf.write(u"'") - recognizer.notifyErrorListeners(buf.getvalue()) - - def getDecisionDescription(self, recognizer, dfa): - decision = dfa.decision - ruleIndex = dfa.atnStartState.ruleIndex - - ruleNames = recognizer.ruleNames - if ruleIndex < 0 or ruleIndex >= len(ruleNames): - return unicode(decision) - - ruleName = ruleNames[ruleIndex] - if ruleName is None or len(ruleName)==0: - return unicode(decision) - - return unicode(decision) + u" (" + ruleName + u")" - - # - # Computes the set of conflicting or ambiguous alternatives from a - # configuration set, if that information was not already provided by the - # parser. - # - # @param reportedAlts The set of conflicting or ambiguous alternatives, as - # reported by the parser. - # @param configs The conflicting or ambiguous configuration set. - # @return Returns {@code reportedAlts} if it is not {@code null}, otherwise - # returns the set of alternatives represented in {@code configs}. - # - def getConflictingAlts(self, reportedAlts, configs): - if reportedAlts is not None: - return reportedAlts - - result = set() - for config in configs: - result.add(config.alt) - - return result diff --git a/runtime/Python2/src/antlr4/error/ErrorListener.py b/runtime/Python2/src/antlr4/error/ErrorListener.py deleted file mode 100644 index 357512e1ce..0000000000 --- a/runtime/Python2/src/antlr4/error/ErrorListener.py +++ /dev/null @@ -1,72 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. - -# Provides an empty default implementation of {@link ANTLRErrorListener}. The -# default implementation of each method does nothing, but can be overridden as -# necessary. - -from __future__ import print_function -import sys - -class ErrorListener(object): - - def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e): - pass - - def reportAmbiguity(self, recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs): - pass - - def reportAttemptingFullContext(self, recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs): - pass - - def reportContextSensitivity(self, recognizer, dfa, startIndex, stopIndex, prediction, configs): - pass - -class ConsoleErrorListener(ErrorListener): - # - # Provides a default instance of {@link ConsoleErrorListener}. - # - INSTANCE = None - - # - # {@inheritDoc} - # - #

      - # This implementation prints messages to {@link System#err} containing the - # values of {@code line}, {@code charPositionInLine}, and {@code msg} using - # the following format.

      - # - #
      -    # line line:charPositionInLine msg
      -    # 
      - # - def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e): - print("line " + str(line) + ":" + str(column) + " " + msg, file=sys.stderr) - -ConsoleErrorListener.INSTANCE = ConsoleErrorListener() - -class ProxyErrorListener(ErrorListener): - - def __init__(self, delegates): - super(ProxyErrorListener, self).__init__() - if delegates is None: - raise ReferenceError("delegates") - self.delegates = delegates - - def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e): - for delegate in self.delegates: - delegate.syntaxError(recognizer, offendingSymbol, line, column, msg, e) - - def reportAmbiguity(self, recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs): - for delegate in self.delegates: - delegate.reportAmbiguity(recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs) - - def reportAttemptingFullContext(self, recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs): - for delegate in self.delegates: - delegate.reportAttemptingFullContext(recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs) - - def reportContextSensitivity(self, recognizer, dfa, startIndex, stopIndex, prediction, configs): - for delegate in self.delegates: - delegate.reportContextSensitivity(recognizer, dfa, startIndex, stopIndex, prediction, configs) diff --git a/runtime/Python2/src/antlr4/error/ErrorStrategy.py b/runtime/Python2/src/antlr4/error/ErrorStrategy.py deleted file mode 100644 index f79cc59c65..0000000000 --- a/runtime/Python2/src/antlr4/error/ErrorStrategy.py +++ /dev/null @@ -1,702 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# -from antlr4.IntervalSet import IntervalSet - -from antlr4.Token import Token -from antlr4.atn.ATNState import ATNState -from antlr4.error.Errors import NoViableAltException, InputMismatchException, FailedPredicateException, ParseCancellationException - -class ErrorStrategy(object): - - def reset(self, recognizer): - pass - - def recoverInline(self, recognizer): - pass - - def recover(self, recognizer, e): - pass - - def sync(self, recognizer): - pass - - def inErrorRecoveryMode(self, recognizer): - pass - - def reportError(self, recognizer, e): - pass - - -# This is the default implementation of {@link ANTLRErrorStrategy} used for -# error reporting and recovery in ANTLR parsers. -# -class DefaultErrorStrategy(ErrorStrategy): - - def __init__(self): - super(DefaultErrorStrategy, self).__init__() - # Indicates whether the error strategy is currently "recovering from an - # error". This is used to suppress reporting multiple error messages while - # attempting to recover from a detected syntax error. - # - # @see #inErrorRecoveryMode - # - self.errorRecoveryMode = False - - # The index into the input stream where the last error occurred. - # This is used to prevent infinite loops where an error is found - # but no token is consumed during recovery...another error is found, - # ad nauseum. This is a failsafe mechanism to guarantee that at least - # one token/tree node is consumed for two errors. - # - self.lastErrorIndex = -1 - self.lastErrorStates = None - self.nextTokensContext = None - self.nextTokenState = 0 - - #

      The default implementation simply calls {@link #endErrorCondition} to - # ensure that the handler is not in error recovery mode.

      - def reset(self, recognizer): - self.endErrorCondition(recognizer) - - # - # This method is called to enter error recovery mode when a recognition - # exception is reported. - # - # @param recognizer the parser instance - # - def beginErrorCondition(self, recognizer): - self.errorRecoveryMode = True - - def inErrorRecoveryMode(self, recognizer): - return self.errorRecoveryMode - - # - # This method is called to leave error recovery mode after recovering from - # a recognition exception. - # - # @param recognizer - # - def endErrorCondition(self, recognizer): - self.errorRecoveryMode = False - self.lastErrorStates = None - self.lastErrorIndex = -1 - - # - # {@inheritDoc} - # - #

      The default implementation simply calls {@link #endErrorCondition}.

      - # - def reportMatch(self, recognizer): - self.endErrorCondition(recognizer) - - # - # {@inheritDoc} - # - #

      The default implementation returns immediately if the handler is already - # in error recovery mode. Otherwise, it calls {@link #beginErrorCondition} - # and dispatches the reporting task based on the runtime type of {@code e} - # according to the following table.

      - # - #
        - #
      • {@link NoViableAltException}: Dispatches the call to - # {@link #reportNoViableAlternative}
      • - #
      • {@link InputMismatchException}: Dispatches the call to - # {@link #reportInputMismatch}
      • - #
      • {@link FailedPredicateException}: Dispatches the call to - # {@link #reportFailedPredicate}
      • - #
      • All other types: calls {@link Parser#notifyErrorListeners} to report - # the exception
      • - #
      - # - def reportError(self, recognizer, e): - # if we've already reported an error and have not matched a token - # yet successfully, don't report any errors. - if self.inErrorRecoveryMode(recognizer): - return # don't report spurious errors - self.beginErrorCondition(recognizer) - if isinstance( e, NoViableAltException ): - self.reportNoViableAlternative(recognizer, e) - elif isinstance( e, InputMismatchException ): - self.reportInputMismatch(recognizer, e) - elif isinstance( e, FailedPredicateException ): - self.reportFailedPredicate(recognizer, e) - else: - print("unknown recognition error type: " + type(e).__name__) - recognizer.notifyErrorListeners(e.message, e.getOffendingToken(), e) - - # - # {@inheritDoc} - # - #

      The default implementation resynchronizes the parser by consuming tokens - # until we find one in the resynchronization set--loosely the set of tokens - # that can follow the current rule.

      - # - def recover(self, recognizer, e): - if self.lastErrorIndex==recognizer.getInputStream().index \ - and self.lastErrorStates is not None \ - and recognizer.state in self.lastErrorStates: - # uh oh, another error at same token index and previously-visited - # state in ATN; must be a case where LT(1) is in the recovery - # token set so nothing got consumed. Consume a single token - # at least to prevent an infinite loop; this is a failsafe. - recognizer.consume() - - self.lastErrorIndex = recognizer._input.index - if self.lastErrorStates is None: - self.lastErrorStates = [] - self.lastErrorStates.append(recognizer.state) - followSet = self.getErrorRecoverySet(recognizer) - self.consumeUntil(recognizer, followSet) - - # The default implementation of {@link ANTLRErrorStrategy#sync} makes sure - # that the current lookahead symbol is consistent with what were expecting - # at this point in the ATN. You can call this anytime but ANTLR only - # generates code to check before subrules/loops and each iteration. - # - #

      Implements Jim Idle's magic sync mechanism in closures and optional - # subrules. E.g.,

      - # - #
      -    # a : sync ( stuff sync )* ;
      -    # sync : {consume to what can follow sync} ;
      -    # 
      - # - # At the start of a sub rule upon error, {@link #sync} performs single - # token deletion, if possible. If it can't do that, it bails on the current - # rule and uses the default error recovery, which consumes until the - # resynchronization set of the current rule. - # - #

      If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block - # with an empty alternative), then the expected set includes what follows - # the subrule.

      - # - #

      During loop iteration, it consumes until it sees a token that can start a - # sub rule or what follows loop. Yes, that is pretty aggressive. We opt to - # stay in the loop as long as possible.

      - # - #

      ORIGINS

      - # - #

      Previous versions of ANTLR did a poor job of their recovery within loops. - # A single mismatch token or missing token would force the parser to bail - # out of the entire rules surrounding the loop. So, for rule

      - # - #
      -    # classDef : 'class' ID '{' member* '}'
      -    # 
      - # - # input with an extra token between members would force the parser to - # consume until it found the next class definition rather than the next - # member definition of the current class. - # - #

      This functionality cost a little bit of effort because the parser has to - # compare token set at the start of the loop and at each iteration. If for - # some reason speed is suffering for you, you can turn off this - # functionality by simply overriding this method as a blank { }.

      - # - def sync(self, recognizer): - # If already recovering, don't try to sync - if self.inErrorRecoveryMode(recognizer): - return - - s = recognizer._interp.atn.states[recognizer.state] - la = recognizer.getTokenStream().LA(1) - # try cheaper subset first; might get lucky. seems to shave a wee bit off - nextTokens = recognizer.atn.nextTokens(s) - if la in nextTokens: - self.nextTokensContext = None - self.nextTokenState = ATNState.INVALID_STATE_NUMBER - return - elif Token.EPSILON in nextTokens: - if self.nextTokensContext is None: - # It's possible the next token won't match information tracked - # by sync is restricted for performance. - self.nextTokensContext = recognizer._ctx - self.nextTokensState = recognizer._stateNumber - return - - if s.stateType in [ATNState.BLOCK_START, ATNState.STAR_BLOCK_START, - ATNState.PLUS_BLOCK_START, ATNState.STAR_LOOP_ENTRY]: - # report error and recover if possible - if self.singleTokenDeletion(recognizer)is not None: - return - else: - raise InputMismatchException(recognizer) - - elif s.stateType in [ATNState.PLUS_LOOP_BACK, ATNState.STAR_LOOP_BACK]: - self.reportUnwantedToken(recognizer) - expecting = recognizer.getExpectedTokens() - whatFollowsLoopIterationOrRule = expecting.addSet(self.getErrorRecoverySet(recognizer)) - self.consumeUntil(recognizer, whatFollowsLoopIterationOrRule) - - else: - # do nothing if we can't identify the exact kind of ATN state - pass - - # This is called by {@link #reportError} when the exception is a - # {@link NoViableAltException}. - # - # @see #reportError - # - # @param recognizer the parser instance - # @param e the recognition exception - # - def reportNoViableAlternative(self, recognizer, e): - tokens = recognizer.getTokenStream() - if tokens is not None: - if e.startToken.type==Token.EOF: - input = "" - else: - input = tokens.getText(e.startToken, e.offendingToken) - else: - input = "" - msg = "no viable alternative at input " + self.escapeWSAndQuote(input) - recognizer.notifyErrorListeners(msg, e.offendingToken, e) - - # - # This is called by {@link #reportError} when the exception is an - # {@link InputMismatchException}. - # - # @see #reportError - # - # @param recognizer the parser instance - # @param e the recognition exception - # - def reportInputMismatch(self, recognizer, e): - msg = "mismatched input " + self.getTokenErrorDisplay(e.offendingToken) \ - + " expecting " + e.getExpectedTokens().toString(recognizer.literalNames, recognizer.symbolicNames) - recognizer.notifyErrorListeners(msg, e.offendingToken, e) - - # - # This is called by {@link #reportError} when the exception is a - # {@link FailedPredicateException}. - # - # @see #reportError - # - # @param recognizer the parser instance - # @param e the recognition exception - # - def reportFailedPredicate(self, recognizer, e): - ruleName = recognizer.ruleNames[recognizer._ctx.getRuleIndex()] - msg = "rule " + ruleName + " " + e.message - recognizer.notifyErrorListeners(msg, e.offendingToken, e) - - # This method is called to report a syntax error which requires the removal - # of a token from the input stream. At the time this method is called, the - # erroneous symbol is current {@code LT(1)} symbol and has not yet been - # removed from the input stream. When this method returns, - # {@code recognizer} is in error recovery mode. - # - #

      This method is called when {@link #singleTokenDeletion} identifies - # single-token deletion as a viable recovery strategy for a mismatched - # input error.

      - # - #

      The default implementation simply returns if the handler is already in - # error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to - # enter error recovery mode, followed by calling - # {@link Parser#notifyErrorListeners}.

      - # - # @param recognizer the parser instance - # - def reportUnwantedToken(self, recognizer): - if self.inErrorRecoveryMode(recognizer): - return - - self.beginErrorCondition(recognizer) - t = recognizer.getCurrentToken() - tokenName = self.getTokenErrorDisplay(t) - expecting = self.getExpectedTokens(recognizer) - msg = "extraneous input " + tokenName + " expecting " \ - + expecting.toString(recognizer.literalNames, recognizer.symbolicNames) - recognizer.notifyErrorListeners(msg, t, None) - - # This method is called to report a syntax error which requires the - # insertion of a missing token into the input stream. At the time this - # method is called, the missing token has not yet been inserted. When this - # method returns, {@code recognizer} is in error recovery mode. - # - #

      This method is called when {@link #singleTokenInsertion} identifies - # single-token insertion as a viable recovery strategy for a mismatched - # input error.

      - # - #

      The default implementation simply returns if the handler is already in - # error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to - # enter error recovery mode, followed by calling - # {@link Parser#notifyErrorListeners}.

      - # - # @param recognizer the parser instance - # - def reportMissingToken(self, recognizer): - if self.inErrorRecoveryMode(recognizer): - return - self.beginErrorCondition(recognizer) - t = recognizer.getCurrentToken() - expecting = self.getExpectedTokens(recognizer) - msg = "missing " + expecting.toString(recognizer.literalNames, recognizer.symbolicNames) \ - + " at " + self.getTokenErrorDisplay(t) - recognizer.notifyErrorListeners(msg, t, None) - - #

      The default implementation attempts to recover from the mismatched input - # by using single token insertion and deletion as described below. If the - # recovery attempt fails, this method throws an - # {@link InputMismatchException}.

      - # - #

      EXTRA TOKEN (single token deletion)

      - # - #

      {@code LA(1)} is not what we are looking for. If {@code LA(2)} has the - # right token, however, then assume {@code LA(1)} is some extra spurious - # token and delete it. Then consume and return the next token (which was - # the {@code LA(2)} token) as the successful result of the match operation.

      - # - #

      This recovery strategy is implemented by {@link #singleTokenDeletion}.

      - # - #

      MISSING TOKEN (single token insertion)

      - # - #

      If current token (at {@code LA(1)}) is consistent with what could come - # after the expected {@code LA(1)} token, then assume the token is missing - # and use the parser's {@link TokenFactory} to create it on the fly. The - # "insertion" is performed by returning the created token as the successful - # result of the match operation.

      - # - #

      This recovery strategy is implemented by {@link #singleTokenInsertion}.

      - # - #

      EXAMPLE

      - # - #

      For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When - # the parser returns from the nested call to {@code expr}, it will have - # call chain:

      - # - #
      -    # stat → expr → atom
      -    # 
      - # - # and it will be trying to match the {@code ')'} at this point in the - # derivation: - # - #
      -    # => ID '=' '(' INT ')' ('+' atom)* ';'
      -    #                    ^
      -    # 
      - # - # The attempt to match {@code ')'} will fail when it sees {@code ';'} and - # call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} - # is in the set of tokens that can follow the {@code ')'} token reference - # in rule {@code atom}. It can assume that you forgot the {@code ')'}. - # - def recoverInline(self, recognizer): - # SINGLE TOKEN DELETION - matchedSymbol = self.singleTokenDeletion(recognizer) - if matchedSymbol is not None: - # we have deleted the extra token. - # now, move past ttype token as if all were ok - recognizer.consume() - return matchedSymbol - - # SINGLE TOKEN INSERTION - if self.singleTokenInsertion(recognizer): - return self.getMissingSymbol(recognizer) - - # even that didn't work; must throw the exception - raise InputMismatchException(recognizer) - - # - # This method implements the single-token insertion inline error recovery - # strategy. It is called by {@link #recoverInline} if the single-token - # deletion strategy fails to recover from the mismatched input. If this - # method returns {@code true}, {@code recognizer} will be in error recovery - # mode. - # - #

      This method determines whether or not single-token insertion is viable by - # checking if the {@code LA(1)} input symbol could be successfully matched - # if it were instead the {@code LA(2)} symbol. If this method returns - # {@code true}, the caller is responsible for creating and inserting a - # token with the correct type to produce this behavior.

      - # - # @param recognizer the parser instance - # @return {@code true} if single-token insertion is a viable recovery - # strategy for the current mismatched input, otherwise {@code false} - # - def singleTokenInsertion(self, recognizer): - currentSymbolType = recognizer.getTokenStream().LA(1) - # if current token is consistent with what could come after current - # ATN state, then we know we're missing a token; error recovery - # is free to conjure up and insert the missing token - atn = recognizer._interp.atn - currentState = atn.states[recognizer.state] - next = currentState.transitions[0].target - expectingAtLL2 = atn.nextTokens(next, recognizer._ctx) - if currentSymbolType in expectingAtLL2: - self.reportMissingToken(recognizer) - return True - else: - return False - - # This method implements the single-token deletion inline error recovery - # strategy. It is called by {@link #recoverInline} to attempt to recover - # from mismatched input. If this method returns null, the parser and error - # handler state will not have changed. If this method returns non-null, - # {@code recognizer} will not be in error recovery mode since the - # returned token was a successful match. - # - #

      If the single-token deletion is successful, this method calls - # {@link #reportUnwantedToken} to report the error, followed by - # {@link Parser#consume} to actually "delete" the extraneous token. Then, - # before returning {@link #reportMatch} is called to signal a successful - # match.

      - # - # @param recognizer the parser instance - # @return the successfully matched {@link Token} instance if single-token - # deletion successfully recovers from the mismatched input, otherwise - # {@code null} - # - def singleTokenDeletion(self, recognizer): - nextTokenType = recognizer.getTokenStream().LA(2) - expecting = self.getExpectedTokens(recognizer) - if nextTokenType in expecting: - self.reportUnwantedToken(recognizer) - # print("recoverFromMismatchedToken deleting " \ - # + str(recognizer.getTokenStream().LT(1)) \ - # + " since " + str(recognizer.getTokenStream().LT(2)) \ - # + " is what we want", file=sys.stderr) - recognizer.consume() # simply delete extra token - # we want to return the token we're actually matching - matchedSymbol = recognizer.getCurrentToken() - self.reportMatch(recognizer) # we know current token is correct - return matchedSymbol - else: - return None - - # Conjure up a missing token during error recovery. - # - # The recognizer attempts to recover from single missing - # symbols. But, actions might refer to that missing symbol. - # For example, x=ID {f($x);}. The action clearly assumes - # that there has been an identifier matched previously and that - # $x points at that token. If that token is missing, but - # the next token in the stream is what we want we assume that - # this token is missing and we keep going. Because we - # have to return some token to replace the missing token, - # we have to conjure one up. This method gives the user control - # over the tokens returned for missing tokens. Mostly, - # you will want to create something special for identifier - # tokens. For literals such as '{' and ',', the default - # action in the parser or tree parser works. It simply creates - # a CommonToken of the appropriate type. The text will be the token. - # If you change what tokens must be created by the lexer, - # override this method to create the appropriate tokens. - # - def getMissingSymbol(self, recognizer): - currentSymbol = recognizer.getCurrentToken() - expecting = self.getExpectedTokens(recognizer) - expectedTokenType = expecting[0] # get any element - if expectedTokenType==Token.EOF: - tokenText = u"" - else: - name = None - if expectedTokenType < len(recognizer.literalNames): - name = recognizer.literalNames[expectedTokenType] - if name is None and expectedTokenType < len(recognizer.symbolicNames): - name = recognizer.symbolicNames[expectedTokenType] - tokenText = u"" - current = currentSymbol - lookback = recognizer.getTokenStream().LT(-1) - if current.type==Token.EOF and lookback is not None: - current = lookback - return recognizer.getTokenFactory().create(current.source, - expectedTokenType, tokenText, Token.DEFAULT_CHANNEL, - -1, -1, current.line, current.column) - - def getExpectedTokens(self, recognizer): - return recognizer.getExpectedTokens() - - # How should a token be displayed in an error message? The default - # is to display just the text, but during development you might - # want to have a lot of information spit out. Override in that case - # to use t.toString() (which, for CommonToken, dumps everything about - # the token). This is better than forcing you to override a method in - # your token objects because you don't have to go modify your lexer - # so that it creates a new Java type. - # - def getTokenErrorDisplay(self, t): - if t is None: - return u"" - s = t.text - if s is None: - if t.type==Token.EOF: - s = u"" - else: - s = u"<" + unicode(t.type) + u">" - return self.escapeWSAndQuote(s) - - def escapeWSAndQuote(self, s): - s = s.replace(u"\n",u"\\n") - s = s.replace(u"\r",u"\\r") - s = s.replace(u"\t",u"\\t") - return u"'" + s + u"'" - - # Compute the error recovery set for the current rule. During - # rule invocation, the parser pushes the set of tokens that can - # follow that rule reference on the stack; this amounts to - # computing FIRST of what follows the rule reference in the - # enclosing rule. See LinearApproximator.FIRST(). - # This local follow set only includes tokens - # from within the rule; i.e., the FIRST computation done by - # ANTLR stops at the end of a rule. - # - # EXAMPLE - # - # When you find a "no viable alt exception", the input is not - # consistent with any of the alternatives for rule r. The best - # thing to do is to consume tokens until you see something that - # can legally follow a call to r#or* any rule that called r. - # You don't want the exact set of viable next tokens because the - # input might just be missing a token--you might consume the - # rest of the input looking for one of the missing tokens. - # - # Consider grammar: - # - # a : '[' b ']' - # | '(' b ')' - # ; - # b : c '^' INT ; - # c : ID - # | INT - # ; - # - # At each rule invocation, the set of tokens that could follow - # that rule is pushed on a stack. Here are the various - # context-sensitive follow sets: - # - # FOLLOW(b1_in_a) = FIRST(']') = ']' - # FOLLOW(b2_in_a) = FIRST(')') = ')' - # FOLLOW(c_in_b) = FIRST('^') = '^' - # - # Upon erroneous input "[]", the call chain is - # - # a -> b -> c - # - # and, hence, the follow context stack is: - # - # depth follow set start of rule execution - # 0 a (from main()) - # 1 ']' b - # 2 '^' c - # - # Notice that ')' is not included, because b would have to have - # been called from a different context in rule a for ')' to be - # included. - # - # For error recovery, we cannot consider FOLLOW(c) - # (context-sensitive or otherwise). We need the combined set of - # all context-sensitive FOLLOW sets--the set of all tokens that - # could follow any reference in the call chain. We need to - # resync to one of those tokens. Note that FOLLOW(c)='^' and if - # we resync'd to that token, we'd consume until EOF. We need to - # sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. - # In this case, for input "[]", LA(1) is ']' and in the set, so we would - # not consume anything. After printing an error, rule c would - # return normally. Rule b would not find the required '^' though. - # At this point, it gets a mismatched token error and throws an - # exception (since LA(1) is not in the viable following token - # set). The rule exception handler tries to recover, but finds - # the same recovery set and doesn't consume anything. Rule b - # exits normally returning to rule a. Now it finds the ']' (and - # with the successful match exits errorRecovery mode). - # - # So, you can see that the parser walks up the call chain looking - # for the token that was a member of the recovery set. - # - # Errors are not generated in errorRecovery mode. - # - # ANTLR's error recovery mechanism is based upon original ideas: - # - # "Algorithms + Data Structures = Programs" by Niklaus Wirth - # - # and - # - # "A note on error recovery in recursive descent parsers": - # http:#portal.acm.org/citation.cfm?id=947902.947905 - # - # Later, Josef Grosch had some good ideas: - # - # "Efficient and Comfortable Error Recovery in Recursive Descent - # Parsers": - # ftp:#www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip - # - # Like Grosch I implement context-sensitive FOLLOW sets that are combined - # at run-time upon error to avoid overhead during parsing. - # - def getErrorRecoverySet(self, recognizer): - atn = recognizer._interp.atn - ctx = recognizer._ctx - recoverSet = IntervalSet() - while ctx is not None and ctx.invokingState>=0: - # compute what follows who invoked us - invokingState = atn.states[ctx.invokingState] - rt = invokingState.transitions[0] - follow = atn.nextTokens(rt.followState) - recoverSet.addSet(follow) - ctx = ctx.parentCtx - recoverSet.removeOne(Token.EPSILON) - return recoverSet - - # Consume tokens until one matches the given token set.# - def consumeUntil(self, recognizer, set_): - ttype = recognizer.getTokenStream().LA(1) - while ttype != Token.EOF and not ttype in set_: - recognizer.consume() - ttype = recognizer.getTokenStream().LA(1) - - -# -# This implementation of {@link ANTLRErrorStrategy} responds to syntax errors -# by immediately canceling the parse operation with a -# {@link ParseCancellationException}. The implementation ensures that the -# {@link ParserRuleContext#exception} field is set for all parse tree nodes -# that were not completed prior to encountering the error. -# -#

      -# This error strategy is useful in the following scenarios.

      -# -#
        -#
      • Two-stage parsing: This error strategy allows the first -# stage of two-stage parsing to immediately terminate if an error is -# encountered, and immediately fall back to the second stage. In addition to -# avoiding wasted work by attempting to recover from errors here, the empty -# implementation of {@link BailErrorStrategy#sync} improves the performance of -# the first stage.
      • -#
      • Silent validation: When syntax errors are not being -# reported or logged, and the parse result is simply ignored if errors occur, -# the {@link BailErrorStrategy} avoids wasting work on recovering from errors -# when the result will be ignored either way.
      • -#
      -# -#

      -# {@code myparser.setErrorHandler(new BailErrorStrategy());}

      -# -# @see Parser#setErrorHandler(ANTLRErrorStrategy) -# -class BailErrorStrategy(DefaultErrorStrategy): - # Instead of recovering from exception {@code e}, re-throw it wrapped - # in a {@link ParseCancellationException} so it is not caught by the - # rule function catches. Use {@link Exception#getCause()} to get the - # original {@link RecognitionException}. - # - def recover(self, recognizer, e): - context = recognizer._ctx - while context is not None: - context.exception = e - context = context.parentCtx - raise ParseCancellationException(e) - - # Make sure we don't attempt to recover inline; if the parser - # successfully recovers, it won't throw an exception. - # - def recoverInline(self, recognizer): - self.recover(recognizer, InputMismatchException(recognizer)) - - # Make sure we don't attempt to recover from problems in subrules.# - def sync(self, recognizer): - pass diff --git a/runtime/Python2/src/antlr4/error/Errors.py b/runtime/Python2/src/antlr4/error/Errors.py deleted file mode 100644 index 55a3db8bce..0000000000 --- a/runtime/Python2/src/antlr4/error/Errors.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -#/ -from antlr4.atn.Transition import PredicateTransition - - -class UnsupportedOperationException(Exception): - - def __init__(self, msg): - super(UnsupportedOperationException, self).__init__(msg) - -class IllegalStateException(Exception): - - def __init__(self, msg): - super(IllegalStateException, self).__init__(msg) - -class CancellationException(IllegalStateException): - - def __init__(self, msg): - super(CancellationException, self).__init__(msg) - -# The root of the ANTLR exception hierarchy. In general, ANTLR tracks just -# 3 kinds of errors: prediction errors, failed predicate errors, and -# mismatched input errors. In each case, the parser knows where it is -# in the input, where it is in the ATN, the rule invocation stack, -# and what kind of problem occurred. - -from antlr4.InputStream import InputStream -from antlr4.ParserRuleContext import ParserRuleContext -from antlr4.Recognizer import Recognizer - -class RecognitionException(Exception): - - - def __init__(self, message=None, recognizer=None, input=None, ctx=None): - super(RecognitionException, self).__init__(message) - self.recognizer = recognizer - self.input = input - self.ctx = ctx - # The current {@link Token} when an error occurred. Since not all streams - # support accessing symbols by index, we have to track the {@link Token} - # instance itself. - self.offendingToken = None - # Get the ATN state number the parser was in at the time the error - # occurred. For {@link NoViableAltException} and - # {@link LexerNoViableAltException} exceptions, this is the - # {@link DecisionState} number. For others, it is the state whose outgoing - # edge we couldn't match. - self.offendingState = -1 - if recognizer is not None: - self.offendingState = recognizer.state - - #

      If the state number is not known, this method returns -1.

      - - # - # Gets the set of input symbols which could potentially follow the - # previously matched symbol at the time this exception was thrown. - # - #

      If the set of expected tokens is not known and could not be computed, - # this method returns {@code null}.

      - # - # @return The set of token types that could potentially follow the current - # state in the ATN, or {@code null} if the information is not available. - #/ - def getExpectedTokens(self): - if self.recognizer is not None: - return self.recognizer.atn.getExpectedTokens(self.offendingState, self.ctx) - else: - return None - - def __str__(self): - return unicode(self) - - -class LexerNoViableAltException(RecognitionException): - - def __init__(self, lexer, input, startIndex, deadEndConfigs): - super(LexerNoViableAltException, self).__init__(message=None, recognizer=lexer, input=input, ctx=None) - self.startIndex = startIndex - self.deadEndConfigs = deadEndConfigs - self.message = "" - - def __unicode__(self): - symbol = "" - if self.startIndex >= 0 and self.startIndex < self.input.size(): - symbol = self.input.getText(self.startIndex,self.startIndex) - # TODO symbol = Utils.escapeWhitespace(symbol, false); - return u"LexerNoViableAltException" + symbol - -# Indicates that the parser could not decide which of two or more paths -# to take based upon the remaining input. It tracks the starting token -# of the offending input and also knows where the parser was -# in the various paths when the error. Reported by reportNoViableAlternative() -# -class NoViableAltException(RecognitionException): - - def __init__(self, recognizer, input=None, startToken=None, offendingToken=None, deadEndConfigs=None, ctx=None): - if ctx is None: - ctx = recognizer._ctx - if offendingToken is None: - offendingToken = recognizer.getCurrentToken() - if startToken is None: - startToken = recognizer.getCurrentToken() - if input is None: - input = recognizer.getInputStream() - super(NoViableAltException, self).__init__(recognizer=recognizer, input=input, ctx=ctx) - # Which configurations did we try at input.index() that couldn't match input.LT(1)?# - self.deadEndConfigs = deadEndConfigs - # The token object at the start index; the input stream might - # not be buffering tokens so get a reference to it. (At the - # time the error occurred, of course the stream needs to keep a - # buffer all of the tokens but later we might not have access to those.) - self.startToken = startToken - self.offendingToken = offendingToken - -# This signifies any kind of mismatched input exceptions such as -# when the current input does not match the expected token. -# -class InputMismatchException(RecognitionException): - - def __init__(self, recognizer): - super(InputMismatchException, self).__init__(recognizer=recognizer, input=recognizer.getInputStream(), ctx=recognizer._ctx) - self.offendingToken = recognizer.getCurrentToken() - - -# A semantic predicate failed during validation. Validation of predicates -# occurs when normally parsing the alternative just like matching a token. -# Disambiguating predicate evaluation occurs when we test a predicate during -# prediction. - -class FailedPredicateException(RecognitionException): - - def __init__(self, recognizer, predicate=None, message=None): - super(FailedPredicateException, self).__init__(message=self.formatMessage(predicate,message), recognizer=recognizer, - input=recognizer.getInputStream(), ctx=recognizer._ctx) - s = recognizer._interp.atn.states[recognizer.state] - trans = s.transitions[0] - if isinstance(trans, PredicateTransition): - self.ruleIndex = trans.ruleIndex - self.predicateIndex = trans.predIndex - else: - self.ruleIndex = 0 - self.predicateIndex = 0 - self.predicate = predicate - self.offendingToken = recognizer.getCurrentToken() - - def formatMessage(self, predicate, message): - if message is not None: - return message - else: - return "failed predicate: {" + predicate + "}?" - -class ParseCancellationException(CancellationException): - - pass - diff --git a/runtime/Python2/src/antlr4/error/__init__.py b/runtime/Python2/src/antlr4/error/__init__.py deleted file mode 100644 index 216c000dc5..0000000000 --- a/runtime/Python2/src/antlr4/error/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__author__ = 'ericvergnaud' diff --git a/runtime/Python2/src/antlr4/tree/Chunk.py b/runtime/Python2/src/antlr4/tree/Chunk.py deleted file mode 100644 index 3ae0518001..0000000000 --- a/runtime/Python2/src/antlr4/tree/Chunk.py +++ /dev/null @@ -1,32 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - -class Chunk(object): - - def __str__(self): - return unicode(self) - - -class TagChunk(Chunk): - - def __init__(self, tag, label=None): - self.tag = tag - self.label = label - - def __unicode__(self): - if self.label is None: - return self.tag - else: - return self.label + ":" + self.tag - -class TextChunk(Chunk): - - def __init__(self, text): - self.text = text - - def __unicode__(self): - return "'" + self.text + "'" - diff --git a/runtime/Python2/src/antlr4/tree/ParseTreeMatch.py b/runtime/Python2/src/antlr4/tree/ParseTreeMatch.py deleted file mode 100644 index 8618173da8..0000000000 --- a/runtime/Python2/src/antlr4/tree/ParseTreeMatch.py +++ /dev/null @@ -1,120 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - - -# -# Represents the result of matching a {@link ParseTree} against a tree pattern. -# -from io import StringIO - - -class ParseTreeMatch(object): - - # - # Constructs a new instance of {@link ParseTreeMatch} from the specified - # parse tree and pattern. - # - # @param tree The parse tree to match against the pattern. - # @param pattern The parse tree pattern. - # @param labels A mapping from label names to collections of - # {@link ParseTree} objects located by the tree pattern matching process. - # @param mismatchedNode The first node which failed to match the tree - # pattern during the matching process. - # - # @exception IllegalArgumentException if {@code tree} is {@code null} - # @exception IllegalArgumentException if {@code pattern} is {@code null} - # @exception IllegalArgumentException if {@code labels} is {@code null} - # - def __init__(self, tree, pattern, labels, mismatchedNode): - if tree is None: - raise Exception("tree cannot be null") - if pattern is None: - raise Exception("pattern cannot be null") - if labels is None: - raise Exception("labels cannot be null") - self.tree = tree - self.pattern = pattern - self.labels = labels - self.mismatchedNode = mismatchedNode - - # - # Get the last node associated with a specific {@code label}. - # - #

      For example, for pattern {@code }, {@code get("id")} returns the - # node matched for that {@code ID}. If more than one node - # matched the specified label, only the last is returned. If there is - # no node associated with the label, this returns {@code null}.

      - # - #

      Pattern tags like {@code } and {@code } without labels are - # considered to be labeled with {@code ID} and {@code expr}, respectively.

      - # - # @param label The label to check. - # - # @return The last {@link ParseTree} to match a tag with the specified - # label, or {@code null} if no parse tree matched a tag with the label. - # - def get(self, label): - parseTrees = self.labels.get(label, None) - if parseTrees is None or len(parseTrees)==0: - return None - else: - return parseTrees[len(parseTrees)-1] - - # - # Return all nodes matching a rule or token tag with the specified label. - # - #

      If the {@code label} is the name of a parser rule or token in the - # grammar, the resulting list will contain both the parse trees matching - # rule or tags explicitly labeled with the label and the complete set of - # parse trees matching the labeled and unlabeled tags in the pattern for - # the parser rule or token. For example, if {@code label} is {@code "foo"}, - # the result will contain all of the following.

      - # - #
        - #
      • Parse tree nodes matching tags of the form {@code } and - # {@code }.
      • - #
      • Parse tree nodes matching tags of the form {@code }.
      • - #
      • Parse tree nodes matching tags of the form {@code }.
      • - #
      - # - # @param label The label. - # - # @return A collection of all {@link ParseTree} nodes matching tags with - # the specified {@code label}. If no nodes matched the label, an empty list - # is returned. - # - def getAll(self, label): - nodes = self.labels.get(label, None) - if nodes is None: - return list() - else: - return nodes - - - # - # Gets a value indicating whether the match operation succeeded. - # - # @return {@code true} if the match operation succeeded; otherwise, - # {@code false}. - # - def succeeded(self): - return self.mismatchedNode is None - - # - # {@inheritDoc} - # - def __str__(self): - return unicode(self) - - - def __unicode__(self): - with StringIO() as buf: - buf.write(u"Match ") - buf.write(u"succeeded" if self.succeeded() else "failed") - buf.write(u"; found ") - buf.write(unicode(len(self.labels))) - buf.write(u" labels") - return buf.getvalue() diff --git a/runtime/Python2/src/antlr4/tree/ParseTreePattern.py b/runtime/Python2/src/antlr4/tree/ParseTreePattern.py deleted file mode 100644 index 9f78304ad5..0000000000 --- a/runtime/Python2/src/antlr4/tree/ParseTreePattern.py +++ /dev/null @@ -1,69 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - -# -# A pattern like {@code = ;} converted to a {@link ParseTree} by -# {@link ParseTreePatternMatcher#compile(String, int)}. -# -from antlr4.xpath.XPath import XPath - - -class ParseTreePattern(object): - - # Construct a new instance of the {@link ParseTreePattern} class. - # - # @param matcher The {@link ParseTreePatternMatcher} which created this - # tree pattern. - # @param pattern The tree pattern in concrete syntax form. - # @param patternRuleIndex The parser rule which serves as the root of the - # tree pattern. - # @param patternTree The tree pattern in {@link ParseTree} form. - # - def __init__(self, matcher, pattern, patternRuleIndex , patternTree): - self.matcher = matcher - self.patternRuleIndex = patternRuleIndex - self.pattern = pattern - self.patternTree = patternTree - - # - # Match a specific parse tree against this tree pattern. - # - # @param tree The parse tree to match against this tree pattern. - # @return A {@link ParseTreeMatch} object describing the result of the - # match operation. The {@link ParseTreeMatch#succeeded()} method can be - # used to determine whether or not the match was successful. - # - def match(self, tree): - return self.matcher.match(tree, self) - - # - # Determine whether or not a parse tree matches this tree pattern. - # - # @param tree The parse tree to match against this tree pattern. - # @return {@code true} if {@code tree} is a match for the current tree - # pattern; otherwise, {@code false}. - # - def matches(self, tree): - return self.matcher.match(tree, self).succeeded() - - # Find all nodes using XPath and then try to match those subtrees against - # this tree pattern. - # - # @param tree The {@link ParseTree} to match against this pattern. - # @param xpath An expression matching the nodes - # - # @return A collection of {@link ParseTreeMatch} objects describing the - # successful matches. Unsuccessful matches are omitted from the result, - # regardless of the reason for the failure. - # - def findAll(self, tree, xpath): - subtrees = XPath.findAll(tree, xpath, self.matcher.parser) - matches = list() - for t in subtrees: - match = self.match(t) - if match.succeeded(): - matches.append(match) - return matches diff --git a/runtime/Python2/src/antlr4/tree/ParseTreePatternMatcher.py b/runtime/Python2/src/antlr4/tree/ParseTreePatternMatcher.py deleted file mode 100644 index 9f0caffaae..0000000000 --- a/runtime/Python2/src/antlr4/tree/ParseTreePatternMatcher.py +++ /dev/null @@ -1,367 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - -# -# A tree pattern matching mechanism for ANTLR {@link ParseTree}s. -# -#

      Patterns are strings of source input text with special tags representing -# token or rule references such as:

      -# -#

      {@code = ;}

      -# -#

      Given a pattern start rule such as {@code statement}, this object constructs -# a {@link ParseTree} with placeholders for the {@code ID} and {@code expr} -# subtree. Then the {@link #match} routines can compare an actual -# {@link ParseTree} from a parse with this pattern. Tag {@code } matches -# any {@code ID} token and tag {@code } references the result of the -# {@code expr} rule (generally an instance of {@code ExprContext}.

      -# -#

      Pattern {@code x = 0;} is a similar pattern that matches the same pattern -# except that it requires the identifier to be {@code x} and the expression to -# be {@code 0}.

      -# -#

      The {@link #matches} routines return {@code true} or {@code false} based -# upon a match for the tree rooted at the parameter sent in. The -# {@link #match} routines return a {@link ParseTreeMatch} object that -# contains the parse tree, the parse tree pattern, and a map from tag name to -# matched nodes (more below). A subtree that fails to match, returns with -# {@link ParseTreeMatch#mismatchedNode} set to the first tree node that did not -# match.

      -# -#

      For efficiency, you can compile a tree pattern in string form to a -# {@link ParseTreePattern} object.

      -# -#

      See {@code TestParseTreeMatcher} for lots of examples. -# {@link ParseTreePattern} has two static helper methods: -# {@link ParseTreePattern#findAll} and {@link ParseTreePattern#match} that -# are easy to use but not super efficient because they create new -# {@link ParseTreePatternMatcher} objects each time and have to compile the -# pattern in string form before using it.

      -# -#

      The lexer and parser that you pass into the {@link ParseTreePatternMatcher} -# constructor are used to parse the pattern in string form. The lexer converts -# the {@code = ;} into a sequence of four tokens (assuming lexer -# throws out whitespace or puts it on a hidden channel). Be aware that the -# input stream is reset for the lexer (but not the parser; a -# {@link ParserInterpreter} is created to parse the input.). Any user-defined -# fields you have put into the lexer might get changed when this mechanism asks -# it to scan the pattern string.

      -# -#

      Normally a parser does not accept token {@code } as a valid -# {@code expr} but, from the parser passed in, we create a special version of -# the underlying grammar representation (an {@link ATN}) that allows imaginary -# tokens representing rules ({@code }) to match entire rules. We call -# these bypass alternatives.

      -# -#

      Delimiters are {@code <} and {@code >}, with {@code \} as the escape string -# by default, but you can set them to whatever you want using -# {@link #setDelimiters}. You must escape both start and stop strings -# {@code \<} and {@code \>}.

      -# -from antlr4 import CommonTokenStream, ParserRuleContext -from antlr4.InputStream import InputStream -from antlr4.ListTokenSource import ListTokenSource -from antlr4.Token import Token -from antlr4.error.ErrorStrategy import BailErrorStrategy -from antlr4.error.Errors import RecognitionException, ParseCancellationException -from antlr4.tree.Chunk import TagChunk, TextChunk -from antlr4.tree.RuleTagToken import RuleTagToken -from antlr4.tree.TokenTagToken import TokenTagToken -from antlr4.tree.Tree import TerminalNode, RuleNode - -class CannotInvokeStartRule(Exception): - - def __init__(self, e): - super(CannotInvokeStartRule, self).__init__(e) - -class StartRuleDoesNotConsumeFullPattern(Exception): - - pass - - -class ParseTreePatternMatcher(object): - - # Constructs a {@link ParseTreePatternMatcher} or from a {@link Lexer} and - # {@link Parser} object. The lexer input stream is altered for tokenizing - # the tree patterns. The parser is used as a convenient mechanism to get - # the grammar name, plus token, rule names. - def __init__(self, lexer, parser): - self.lexer = lexer - self.parser = parser - self.start = "<" - self.stop = ">" - self.escape = "\\" # e.g., \< and \> must escape BOTH! - - # Set the delimiters used for marking rule and token tags within concrete - # syntax used by the tree pattern parser. - # - # @param start The start delimiter. - # @param stop The stop delimiter. - # @param escapeLeft The escape sequence to use for escaping a start or stop delimiter. - # - # @exception IllegalArgumentException if {@code start} is {@code null} or empty. - # @exception IllegalArgumentException if {@code stop} is {@code null} or empty. - # - def setDelimiters(self, start, stop, escapeLeft): - if start is None or len(start)==0: - raise Exception("start cannot be null or empty") - if stop is None or len(stop)==0: - raise Exception("stop cannot be null or empty") - self.start = start - self.stop = stop - self.escape = escapeLeft - - # Does {@code pattern} matched as rule {@code patternRuleIndex} match {@code tree}?# - def matchesRuleIndex(self, tree, pattern, patternRuleIndex): - p = self.compileTreePattern(pattern, patternRuleIndex) - return self.matches(tree, p) - - # Does {@code pattern} matched as rule patternRuleIndex match tree? Pass in a - # compiled pattern instead of a string representation of a tree pattern. - # - def matchesPattern(self, tree, pattern): - mismatchedNode = self.matchImpl(tree, pattern.patternTree, dict()) - return mismatchedNode is None - - # - # Compare {@code pattern} matched as rule {@code patternRuleIndex} against - # {@code tree} and return a {@link ParseTreeMatch} object that contains the - # matched elements, or the node at which the match failed. - # - def matchRuleIndex(self, tree, pattern, patternRuleIndex): - p = self.compileTreePattern(pattern, patternRuleIndex) - return self.matchPattern(tree, p) - - # - # Compare {@code pattern} matched against {@code tree} and return a - # {@link ParseTreeMatch} object that contains the matched elements, or the - # node at which the match failed. Pass in a compiled pattern instead of a - # string representation of a tree pattern. - # - def matchPattern(self, tree, pattern): - labels = dict() - mismatchedNode = self.matchImpl(tree, pattern.patternTree, labels) - from antlr4.tree.ParseTreeMatch import ParseTreeMatch - return ParseTreeMatch(tree, pattern, labels, mismatchedNode) - - # - # For repeated use of a tree pattern, compile it to a - # {@link ParseTreePattern} using this method. - # - def compileTreePattern(self, pattern, patternRuleIndex): - tokenList = self.tokenize(pattern) - tokenSrc = ListTokenSource(tokenList) - tokens = CommonTokenStream(tokenSrc) - from antlr4.ParserInterpreter import ParserInterpreter - parserInterp = ParserInterpreter(self.parser.grammarFileName, self.parser.tokenNames, - self.parser.ruleNames, self.parser.getATNWithBypassAlts(),tokens) - tree = None - try: - parserInterp.setErrorHandler(BailErrorStrategy()) - tree = parserInterp.parse(patternRuleIndex) - except ParseCancellationException as e: - raise e.cause - except RecognitionException as e: - raise e - except Exception as e: - raise CannotInvokeStartRule(e) - - # Make sure tree pattern compilation checks for a complete parse - if tokens.LA(1)!=Token.EOF: - raise StartRuleDoesNotConsumeFullPattern() - - from antlr4.tree.ParseTreePattern import ParseTreePattern - return ParseTreePattern(self, pattern, patternRuleIndex, tree) - - # - # Recursively walk {@code tree} against {@code patternTree}, filling - # {@code match.}{@link ParseTreeMatch#labels labels}. - # - # @return the first node encountered in {@code tree} which does not match - # a corresponding node in {@code patternTree}, or {@code null} if the match - # was successful. The specific node returned depends on the matching - # algorithm used by the implementation, and may be overridden. - # - def matchImpl(self, tree, patternTree, labels): - if tree is None: - raise Exception("tree cannot be null") - if patternTree is None: - raise Exception("patternTree cannot be null") - - # x and , x and y, or x and x; or could be mismatched types - if isinstance(tree, TerminalNode) and isinstance(patternTree, TerminalNode ): - mismatchedNode = None - # both are tokens and they have same type - if tree.symbol.type == patternTree.symbol.type: - if isinstance( patternTree.symbol, TokenTagToken ): # x and - tokenTagToken = patternTree.symbol - # track label->list-of-nodes for both token name and label (if any) - self.map(labels, tokenTagToken.tokenName, tree) - if tokenTagToken.label is not None: - self.map(labels, tokenTagToken.label, tree) - elif tree.getText()==patternTree.getText(): - # x and x - pass - else: - # x and y - if mismatchedNode is None: - mismatchedNode = tree - else: - if mismatchedNode is None: - mismatchedNode = tree - - return mismatchedNode - - if isinstance(tree, ParserRuleContext) and isinstance(patternTree, ParserRuleContext): - mismatchedNode = None - # (expr ...) and - ruleTagToken = self.getRuleTagToken(patternTree) - if ruleTagToken is not None: - m = None - if tree.ruleContext.ruleIndex == patternTree.ruleContext.ruleIndex: - # track label->list-of-nodes for both rule name and label (if any) - self.map(labels, ruleTagToken.ruleName, tree) - if ruleTagToken.label is not None: - self.map(labels, ruleTagToken.label, tree) - else: - if mismatchedNode is None: - mismatchedNode = tree - - return mismatchedNode - - # (expr ...) and (expr ...) - if tree.getChildCount()!=patternTree.getChildCount(): - if mismatchedNode is None: - mismatchedNode = tree - return mismatchedNode - - n = tree.getChildCount() - for i in range(0, n): - childMatch = self.matchImpl(tree.getChild(i), patternTree.getChild(i), labels) - if childMatch is not None: - return childMatch - - return mismatchedNode - - # if nodes aren't both tokens or both rule nodes, can't match - return tree - - def map(self, labels, label, tree): - v = labels.get(label, None) - if v is None: - v = list() - labels[label] = v - v.append(tree) - - # Is {@code t} {@code (expr )} subtree?# - def getRuleTagToken(self, tree): - if isinstance( tree, RuleNode ): - if tree.getChildCount()==1 and isinstance(tree.getChild(0), TerminalNode ): - c = tree.getChild(0) - if isinstance( c.symbol, RuleTagToken ): - return c.symbol - return None - - def tokenize(self, pattern): - # split pattern into chunks: sea (raw input) and islands (, ) - chunks = self.split(pattern) - - # create token stream from text and tags - tokens = list() - for chunk in chunks: - if isinstance( chunk, TagChunk ): - # add special rule token or conjure up new token from name - if chunk.tag[0].isupper(): - ttype = self.parser.getTokenType(chunk.tag) - if ttype==Token.INVALID_TYPE: - raise Exception("Unknown token " + str(chunk.tag) + " in pattern: " + pattern) - tokens.append(TokenTagToken(chunk.tag, ttype, chunk.label)) - elif chunk.tag[0].islower(): - ruleIndex = self.parser.getRuleIndex(chunk.tag) - if ruleIndex==-1: - raise Exception("Unknown rule " + str(chunk.tag) + " in pattern: " + pattern) - ruleImaginaryTokenType = self.parser.getATNWithBypassAlts().ruleToTokenType[ruleIndex] - tokens.append(RuleTagToken(chunk.tag, ruleImaginaryTokenType, chunk.label)) - else: - raise Exception("invalid tag: " + str(chunk.tag) + " in pattern: " + pattern) - else: - self.lexer.setInputStream(InputStream(chunk.text)) - t = self.lexer.nextToken() - while t.type!=Token.EOF: - tokens.append(t) - t = self.lexer.nextToken() - return tokens - - # Split {@code = ;} into 4 chunks for tokenizing by {@link #tokenize}.# - def split(self, pattern): - p = 0 - n = len(pattern) - chunks = list() - # find all start and stop indexes first, then collect - starts = list() - stops = list() - while p < n : - if p == pattern.find(self.escape + self.start, p): - p += len(self.escape) + len(self.start) - elif p == pattern.find(self.escape + self.stop, p): - p += len(self.escape) + len(self.stop) - elif p == pattern.find(self.start, p): - starts.append(p) - p += len(self.start) - elif p == pattern.find(self.stop, p): - stops.append(p) - p += len(self.stop) - else: - p += 1 - - nt = len(starts) - - if nt > len(stops): - raise Exception("unterminated tag in pattern: " + pattern) - if nt < len(stops): - raise Exception("missing start tag in pattern: " + pattern) - - for i in range(0, nt): - if starts[i] >= stops[i]: - raise Exception("tag delimiters out of order in pattern: " + pattern) - - # collect into chunks now - if nt==0: - chunks.append(TextChunk(pattern)) - - if nt>0 and starts[0]>0: # copy text up to first tag into chunks - text = pattern[0:starts[0]] - chunks.add(TextChunk(text)) - - for i in range(0, nt): - # copy inside of - tag = pattern[starts[i] + len(self.start) : stops[i]] - ruleOrToken = tag - label = None - colon = tag.find(':') - if colon >= 0: - label = tag[0:colon] - ruleOrToken = tag[colon+1 : len(tag)] - chunks.append(TagChunk(label, ruleOrToken)) - if i+1 < len(starts): - # copy from end of to start of next - text = pattern[stops[i] + len(self.stop) : starts[i + 1]] - chunks.append(TextChunk(text)) - - if nt > 0 : - afterLastTag = stops[nt - 1] + len(self.stop) - if afterLastTag < n : # copy text from end of last tag to end - text = pattern[afterLastTag : n] - chunks.append(TextChunk(text)) - - # strip out the escape sequences from text chunks but not tags - for i in range(0, len(chunks)): - c = chunks[i] - if isinstance( c, TextChunk ): - unescaped = c.text.replace(self.escape, "") - if len(unescaped) < len(c.text): - chunks[i] = TextChunk(unescaped) - return chunks diff --git a/runtime/Python2/src/antlr4/tree/RuleTagToken.py b/runtime/Python2/src/antlr4/tree/RuleTagToken.py deleted file mode 100644 index d63a3a53bb..0000000000 --- a/runtime/Python2/src/antlr4/tree/RuleTagToken.py +++ /dev/null @@ -1,48 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - -# -# A {@link Token} object representing an entire subtree matched by a parser -# rule; e.g., {@code }. These tokens are created for {@link TagChunk} -# chunks where the tag corresponds to a parser rule. -# -from antlr4.Token import Token - - -class RuleTagToken(Token): - # - # Constructs a new instance of {@link RuleTagToken} with the specified rule - # name, bypass token type, and label. - # - # @param ruleName The name of the parser rule this rule tag matches. - # @param bypassTokenType The bypass token type assigned to the parser rule. - # @param label The label associated with the rule tag, or {@code null} if - # the rule tag is unlabeled. - # - # @exception IllegalArgumentException if {@code ruleName} is {@code null} - # or empty. - - def __init__(self, ruleName, bypassTokenType, label=None): - if ruleName is None or len(ruleName)==0: - raise Exception("ruleName cannot be null or empty.") - self.source = None - self.type = bypassTokenType # token type of the token - self.channel = Token.DEFAULT_CHANNEL # The parser ignores everything not on DEFAULT_CHANNEL - self.start = -1 # optional; return -1 if not implemented. - self.stop = -1 # optional; return -1 if not implemented. - self.tokenIndex = -1 # from 0..n-1 of the token object in the input stream - self.line = 0 # line=1..n of the 1st character - self.column = -1 # beginning of the line at which it occurs, 0..n-1 - self.label = unicode(label) - self._text = self.getText() # text of the token. - self.ruleName = unicode(ruleName) - - - def getText(self): - if self.label is None: - return u"<" + self.ruleName + u">" - else: - return u"<" + self.label + ":" + self.ruleName + u">" diff --git a/runtime/Python2/src/antlr4/tree/TokenTagToken.py b/runtime/Python2/src/antlr4/tree/TokenTagToken.py deleted file mode 100644 index dba41f7852..0000000000 --- a/runtime/Python2/src/antlr4/tree/TokenTagToken.py +++ /dev/null @@ -1,47 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - -# -# A {@link Token} object representing a token of a particular type; e.g., -# {@code }. These tokens are created for {@link TagChunk} chunks where the -# tag corresponds to a lexer rule or token type. -# -from antlr4.Token import CommonToken - - -class TokenTagToken(CommonToken): - - # Constructs a new instance of {@link TokenTagToken} with the specified - # token name, type, and label. - # - # @param tokenName The token name. - # @param type The token type. - # @param label The label associated with the token tag, or {@code null} if - # the token tag is unlabeled. - # - def __init__(self, tokenName, type, label=None): - super(TokenTagToken, self).__init__(type=type) - self.tokenName = unicode(tokenName) - self.label = unicode(label) - self._text = self.getText() - - # - # {@inheritDoc} - # - #

      The implementation for {@link TokenTagToken} returns the token tag - # formatted with {@code <} and {@code >} delimiters.

      - # - def getText(self): - if self.label is None: - return u"<" + self.tokenName + u">" - else: - return u"<" + self.label + u":" + self.tokenName + u">" - - #

      The implementation for {@link TokenTagToken} returns a string of the form - # {@code tokenName:type}.

      - # - def __unicode__(self): - return self.tokenName + u":" + unicode(self.type) diff --git a/runtime/Python2/src/antlr4/tree/Tree.py b/runtime/Python2/src/antlr4/tree/Tree.py deleted file mode 100644 index 5a11ffb66d..0000000000 --- a/runtime/Python2/src/antlr4/tree/Tree.py +++ /dev/null @@ -1,186 +0,0 @@ -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -#/ - - -# The basic notion of a tree has a parent, a payload, and a list of children. -# It is the most abstract interface for all the trees used by ANTLR. -#/ -from antlr4.Token import Token - -INVALID_INTERVAL = (-1, -2) - -class Tree(object): - - def __str__(self): - return unicode(self) - -class SyntaxTree(Tree): - pass - -class ParseTree(SyntaxTree): - pass - -class RuleNode(ParseTree): - pass - -class TerminalNode(ParseTree): - pass - -class ErrorNode(TerminalNode): - pass - -class ParseTreeVisitor(object): - def visit(self, tree): - return tree.accept(self) - - def visitChildren(self, node): - result = self.defaultResult() - n = node.getChildCount() - for i in range(n): - if not self.shouldVisitNextChild(node, result): - return result - - c = node.getChild(i) - childResult = c.accept(self) - result = self.aggregateResult(result, childResult) - - return result - - def visitTerminal(self, node): - return self.defaultResult() - - def visitErrorNode(self, node): - return self.defaultResult() - - def defaultResult(self): - return None - - def aggregateResult(self, aggregate, nextResult): - return nextResult - - def shouldVisitNextChild(self, node, currentResult): - return True - -class ParseTreeListener(object): - - def visitTerminal(self, node): - pass - - def visitErrorNode(self, node): - pass - - def enterEveryRule(self, ctx): - pass - - def exitEveryRule(self, ctx): - pass - -class TerminalNodeImpl(TerminalNode): - - def __init__(self, symbol): - self.parentCtx = None - self.symbol = symbol - - def getChild(self, i): - return None - - def getSymbol(self): - return self.symbol - - def getParent(self): - return self.parentCtx - - def getPayload(self): - return self.symbol - - def getSourceInterval(self): - if self.symbol is None: - return INVALID_INTERVAL - tokenIndex = self.symbol.tokenIndex - return (tokenIndex, tokenIndex) - - def getChildCount(self): - return 0 - - def accept(self, visitor): - return visitor.visitTerminal(self) - - def getText(self): - return unicode(self.symbol.text) - - def __unicode__(self): - if self.symbol.type == Token.EOF: - return u"" - else: - return unicode(self.symbol.text) - -# Represents a token that was consumed during resynchronization -# rather than during a valid match operation. For example, -# we will create this kind of a node during single token insertion -# and deletion as well as during "consume until error recovery set" -# upon no viable alternative exceptions. - -class ErrorNodeImpl(TerminalNodeImpl,ErrorNode): - - def __init__(self, token): - super(ErrorNodeImpl, self).__init__(token) - - def accept(self, visitor): - return visitor.visitErrorNode(self) - - -class ParseTreeWalker(object): - - DEFAULT = None - - def walk(self, listener, t): - """ - Performs a walk on the given parse tree starting at the root and going down recursively - with depth-first search. On each node, {@link ParseTreeWalker#enterRule} is called before - recursively walking down into child nodes, then - {@link ParseTreeWalker#exitRule} is called after the recursive call to wind up. - @param listener The listener used by the walker to process grammar rules - @param t The parse tree to be walked on - """ - if isinstance(t, ErrorNode): - listener.visitErrorNode(t) - return - elif isinstance(t, TerminalNode): - listener.visitTerminal(t) - return - self.enterRule(listener, t) - for child in t.getChildren(): - self.walk(listener, child) - self.exitRule(listener, t) - - # - # The discovery of a rule node, involves sending two events: the generic - # {@link ParseTreeListener#enterEveryRule} and a - # {@link RuleContext}-specific event. First we trigger the generic and then - # the rule specific. We to them in reverse order upon finishing the node. - # - def enterRule(self, listener, r): - """ - Enters a grammar rule by first triggering the generic event {@link ParseTreeListener#enterEveryRule} - then by triggering the event specific to the given parse tree node - @param listener The listener responding to the trigger events - @param r The grammar rule containing the rule context - """ - ctx = r.getRuleContext() - listener.enterEveryRule(ctx) - ctx.enterRule(listener) - - def exitRule(self, listener, r): - """ - Exits a grammar rule by first triggering the event specific to the given parse tree node - then by triggering the generic event {@link ParseTreeListener#exitEveryRule} - @param listener The listener responding to the trigger events - @param r The grammar rule containing the rule context - """ - ctx = r.getRuleContext() - ctx.exitRule(listener) - listener.exitEveryRule(ctx) - -ParseTreeWalker.DEFAULT = ParseTreeWalker() \ No newline at end of file diff --git a/runtime/Python2/src/antlr4/tree/Trees.py b/runtime/Python2/src/antlr4/tree/Trees.py deleted file mode 100644 index fd2ab0f8f3..0000000000 --- a/runtime/Python2/src/antlr4/tree/Trees.py +++ /dev/null @@ -1,110 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - - -# A set of utility routines useful for all kinds of ANTLR trees.# -from io import StringIO -import antlr4 -from antlr4.atn.ATN import ATN -from antlr4.Token import Token -from antlr4.Utils import escapeWhitespace -from antlr4.tree.Tree import RuleNode, ErrorNode, TerminalNode - -class Trees(object): - - # Print out a whole tree in LISP form. {@link #getNodeText} is used on the - # node payloads to get the text for the nodes. Detect - # parse trees and extract data appropriately. - @classmethod - def toStringTree(cls, t, ruleNames=None, recog=None): - if recog is not None: - ruleNames = recog.ruleNames - s = escapeWhitespace(cls.getNodeText(t, ruleNames), False) - if t.getChildCount()==0: - return s - with StringIO() as buf: - buf.write(u"(") - buf.write(s) - buf.write(u' ') - for i in range(0, t.getChildCount()): - if i > 0: - buf.write(u' ') - buf.write(cls.toStringTree(t.getChild(i), ruleNames)) - buf.write(u")") - return buf.getvalue() - - @classmethod - def getNodeText(cls, t, ruleNames=None, recog=None): - if recog is not None: - ruleNames = recog.ruleNames - if ruleNames is not None: - if isinstance(t, RuleNode): - if t.getAltNumber()!=ATN.INVALID_ALT_NUMBER: - return ruleNames[t.getRuleIndex()]+":"+str(t.getAltNumber()) - return ruleNames[t.getRuleIndex()] - elif isinstance( t, ErrorNode): - return unicode(t) - elif isinstance(t, TerminalNode): - if t.symbol is not None: - return t.symbol.text - # no recog for rule names - payload = t.getPayload() - if isinstance(payload, Token ): - return payload.text - return unicode(t.getPayload()) - - - # Return ordered list of all children of this node - @classmethod - def getChildren(cls, t): - return [ t.getChild(i) for i in range(0, t.getChildCount()) ] - - # Return a list of all ancestors of this node. The first node of - # list is the root and the last is the parent of this node. - # - @classmethod - def getAncestors(cls, t): - ancestors = [] - t = t.getParent() - while t is not None: - ancestors.append(0, t) # insert at start - t = t.getParent() - return ancestors - - @classmethod - def findAllTokenNodes(cls, t, ttype): - return cls.findAllNodes(t, ttype, True) - - @classmethod - def findAllRuleNodes(cls, t, ruleIndex): - return cls.findAllNodes(t, ruleIndex, False) - - @classmethod - def findAllNodes(cls, t, index, findTokens): - nodes = [] - cls._findAllNodes(t, index, findTokens, nodes) - return nodes - - @classmethod - def _findAllNodes(cls, t, index, findTokens, nodes): - from antlr4.ParserRuleContext import ParserRuleContext - # check this node (the root) first - if findTokens and isinstance(t, TerminalNode): - if t.symbol.type==index: - nodes.append(t) - elif not findTokens and isinstance(t, ParserRuleContext): - if t.ruleIndex == index: - nodes.append(t) - # check children - for i in range(0, t.getChildCount()): - cls._findAllNodes(t.getChild(i), index, findTokens, nodes) - - @classmethod - def descendants(cls, t): - nodes = [t] - for i in range(0, t.getChildCount()): - nodes.extend(cls.descendants(t.getChild(i))) - return nodes diff --git a/runtime/Python2/src/antlr4/tree/__init__.py b/runtime/Python2/src/antlr4/tree/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/runtime/Python2/src/antlr4/xpath/XPath.py b/runtime/Python2/src/antlr4/xpath/XPath.py deleted file mode 100644 index 92f5e0dac3..0000000000 --- a/runtime/Python2/src/antlr4/xpath/XPath.py +++ /dev/null @@ -1,269 +0,0 @@ -# -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. -# - -# -# Represent a subset of XPath XML path syntax for use in identifying nodes in -# parse trees. -# -#

      -# Split path into words and separators {@code /} and {@code //} via ANTLR -# itself then walk path elements from left to right. At each separator-word -# pair, find set of nodes. Next stage uses those as work list.

      -# -#

      -# The basic interface is -# {@link XPath#findAll ParseTree.findAll}{@code (tree, pathString, parser)}. -# But that is just shorthand for:

      -# -#
      -# {@link XPath} p = new {@link XPath#XPath XPath}(parser, pathString);
      -# return p.{@link #evaluate evaluate}(tree);
      -# 
      -# -#

      -# See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this -# allows operators:

      -# -#
      -#
      /
      root
      -#
      //
      anywhere
      -#
      !
      invert; this must appear directly after root or anywhere -# operator
      -#
      -# -#

      -# and path elements:

      -# -#
      -#
      ID
      token name
      -#
      'string'
      any string literal token from the grammar
      -#
      expr
      rule name
      -#
      *
      wildcard matching any node
      -#
      -# -#

      -# Whitespace is not allowed.

      -# -from antlr4 import CommonTokenStream, DFA, PredictionContextCache, Lexer, LexerATNSimulator, ParserRuleContext, TerminalNode -from antlr4.InputStream import InputStream -from antlr4.Parser import Parser -from antlr4.RuleContext import RuleContext -from antlr4.Token import Token -from antlr4.atn.ATNDeserializer import ATNDeserializer -from antlr4.error.ErrorListener import ErrorListener -from antlr4.error.Errors import LexerNoViableAltException -from antlr4.tree.Tree import ParseTree -from antlr4.tree.Trees import Trees -from io import StringIO -from antlr4.xpath.XPathLexer import XPathLexer - - -class XPath(object): - - WILDCARD = "*" # word not operator/separator - NOT = "!" # word for invert operator - - def __init__(self, parser:Parser, path:str): - self.parser = parser - self.path = path - self.elements = self.split(path) - - def split(self, path:str): - input = InputStream(path) - lexer = XPathLexer(input) - def recover(self, e): - raise e - lexer.recover = recover - lexer.removeErrorListeners() - lexer.addErrorListener(ErrorListener()) # XPathErrorListener does no more - tokenStream = CommonTokenStream(lexer) - try: - tokenStream.fill() - except LexerNoViableAltException as e: - pos = lexer.column - msg = "Invalid tokens or characters at index %d in path '%s'" % (pos, path) - raise Exception(msg, e) - - tokens = iter(tokenStream.tokens) - elements = list() - for el in tokens: - invert = False - anywhere = False - # Check for path separators, if none assume root - if el.type in [XPathLexer.ROOT, XPathLexer.ANYWHERE]: - anywhere = el.type == XPathLexer.ANYWHERE - next_el = next(tokens, None) - if not next_el: - raise Exception('Missing element after %s' % el.getText()) - else: - el = next_el - # Check for bangs - if el.type == XPathLexer.BANG: - invert = True - next_el = next(tokens, None) - if not next_el: - raise Exception('Missing element after %s' % el.getText()) - else: - el = next_el - # Add searched element - if el.type in [XPathLexer.TOKEN_REF, XPathLexer.RULE_REF, XPathLexer.WILDCARD, XPathLexer.STRING]: - element = self.getXPathElement(el, anywhere) - element.invert = invert - elements.append(element) - elif el.type==Token.EOF: - break - else: - raise Exception("Unknown path element %s" % lexer.symbolicNames[el.type]) - return elements - - # - # Convert word like {@code#} or {@code ID} or {@code expr} to a path - # element. {@code anywhere} is {@code true} if {@code //} precedes the - # word. - # - def getXPathElement(self, wordToken:Token, anywhere:bool): - if wordToken.type==Token.EOF: - raise Exception("Missing path element at end of path") - - word = wordToken.text - if wordToken.type==XPathLexer.WILDCARD : - return XPathWildcardAnywhereElement() if anywhere else XPathWildcardElement() - - elif wordToken.type in [XPathLexer.TOKEN_REF, XPathLexer.STRING]: - tsource = self.parser.getTokenStream().tokenSource - - ttype = Token.INVALID_TYPE - if wordToken.type == XPathLexer.TOKEN_REF: - if word in tsource.ruleNames: - ttype = tsource.ruleNames.index(word) + 1 - else: - if word in tsource.literalNames: - ttype = tsource.literalNames.index(word) - - if ttype == Token.INVALID_TYPE: - raise Exception("%s at index %d isn't a valid token name" % (word, wordToken.tokenIndex)) - return XPathTokenAnywhereElement(word, ttype) if anywhere else XPathTokenElement(word, ttype) - - else: - ruleIndex = self.parser.ruleNames.index(word) if word in self.parser.ruleNames else -1 - - if ruleIndex == -1: - raise Exception("%s at index %d isn't a valid rule name" % (word, wordToken.tokenIndex)) - return XPathRuleAnywhereElement(word, ruleIndex) if anywhere else XPathRuleElement(word, ruleIndex) - - - @staticmethod - def findAll(tree:ParseTree, xpath:str, parser:Parser): - p = XPath(parser, xpath) - return p.evaluate(tree) - - # - # Return a list of all nodes starting at {@code t} as root that satisfy the - # path. The root {@code /} is relative to the node passed to - # {@link #evaluate}. - # - def evaluate(self, t:ParseTree): - dummyRoot = ParserRuleContext() - dummyRoot.children = [t] # don't set t's parent. - - work = [dummyRoot] - for element in self.elements: - work_next = list() - for node in work: - if not isinstance(node, TerminalNode) and node.children: - # only try to match next element if it has children - # e.g., //func/*/stat might have a token node for which - # we can't go looking for stat nodes. - matching = element.evaluate(node) - - # See issue antlr#370 - Prevents XPath from returning the - # same node multiple times - matching = filter(lambda m: m not in work_next, matching) - - work_next.extend(matching) - work = work_next - - return work - - -class XPathElement(object): - - def __init__(self, nodeName:str): - self.nodeName = nodeName - self.invert = False - - def __str__(self): - return type(self).__name__ + "[" + ("!" if self.invert else "") + self.nodeName + "]" - - - -# -# Either {@code ID} at start of path or {@code ...//ID} in middle of path. -# -class XPathRuleAnywhereElement(XPathElement): - - def __init__(self, ruleName:str, ruleIndex:int): - super().__init__(ruleName) - self.ruleIndex = ruleIndex - - def evaluate(self, t:ParseTree): - # return all ParserRuleContext descendants of t that match ruleIndex (or do not match if inverted) - return filter(lambda c: isinstance(c, ParserRuleContext) and (self.invert ^ (c.getRuleIndex() == self.ruleIndex)), Trees.descendants(t)) - -class XPathRuleElement(XPathElement): - - def __init__(self, ruleName:str, ruleIndex:int): - super().__init__(ruleName) - self.ruleIndex = ruleIndex - - def evaluate(self, t:ParseTree): - # return all ParserRuleContext children of t that match ruleIndex (or do not match if inverted) - return filter(lambda c: isinstance(c, ParserRuleContext) and (self.invert ^ (c.getRuleIndex() == self.ruleIndex)), Trees.getChildren(t)) - -class XPathTokenAnywhereElement(XPathElement): - - def __init__(self, ruleName:str, tokenType:int): - super().__init__(ruleName) - self.tokenType = tokenType - - def evaluate(self, t:ParseTree): - # return all TerminalNode descendants of t that match tokenType (or do not match if inverted) - return filter(lambda c: isinstance(c, TerminalNode) and (self.invert ^ (c.symbol.type == self.tokenType)), Trees.descendants(t)) - -class XPathTokenElement(XPathElement): - - def __init__(self, ruleName:str, tokenType:int): - super().__init__(ruleName) - self.tokenType = tokenType - - def evaluate(self, t:ParseTree): - # return all TerminalNode children of t that match tokenType (or do not match if inverted) - return filter(lambda c: isinstance(c, TerminalNode) and (self.invert ^ (c.symbol.type == self.tokenType)), Trees.getChildren(t)) - - -class XPathWildcardAnywhereElement(XPathElement): - - def __init__(self): - super().__init__(XPath.WILDCARD) - - def evaluate(self, t:ParseTree): - if self.invert: - return list() # !* is weird but valid (empty) - else: - return Trees.descendants(t) - - -class XPathWildcardElement(XPathElement): - - def __init__(self): - super().__init__(XPath.WILDCARD) - - - def evaluate(self, t:ParseTree): - if self.invert: - return list() # !* is weird but valid (empty) - else: - return Trees.getChildren(t) diff --git a/runtime/Python2/src/antlr4/xpath/XPathLexer.g4 b/runtime/Python2/src/antlr4/xpath/XPathLexer.g4 deleted file mode 100644 index c4b1e173df..0000000000 --- a/runtime/Python2/src/antlr4/xpath/XPathLexer.g4 +++ /dev/null @@ -1,45 +0,0 @@ -lexer grammar XPathLexer; - -tokens { TOKEN_REF, RULE_REF } - -ANYWHERE : '//' ; -ROOT : '/' ; -WILDCARD : '*' ; -BANG : '!' ; - -ID : NameStartChar NameChar* - { - char = self.text[0] - if char.isupper(): - self.type = XPathLexer.TOKEN_REF - else: - self.type = XPathLexer.RULE_REF - } - ; - -fragment -NameChar : NameStartChar - | '0'..'9' - | '_' - | '\u00B7' - | '\u0300'..'\u036F' - | '\u203F'..'\u2040' - ; - -fragment -NameStartChar - : 'A'..'Z' | 'a'..'z' - | '\u00C0'..'\u00D6' - | '\u00D8'..'\u00F6' - | '\u00F8'..'\u02FF' - | '\u0370'..'\u037D' - | '\u037F'..'\u1FFF' - | '\u200C'..'\u200D' - | '\u2070'..'\u218F' - | '\u2C00'..'\u2FEF' - | '\u3001'..'\uD7FF' - | '\uF900'..'\uFDCF' - | '\uFDF0'..'\uFFFD' - ; // ignores | ['\u10000-'\uEFFFF] ; - -STRING : '\'' .*? '\'' ; diff --git a/runtime/Python2/src/antlr4/xpath/XPathLexer.py b/runtime/Python2/src/antlr4/xpath/XPathLexer.py deleted file mode 100644 index f154d90fc7..0000000000 --- a/runtime/Python2/src/antlr4/xpath/XPathLexer.py +++ /dev/null @@ -1,93 +0,0 @@ -# Generated from XPathLexer.g4 by ANTLR 4.11.2-SNAPSHOT -# encoding: utf-8 -from __future__ import print_function -from antlr4 import * -from io import StringIO -import sys - - -def serializedATN(): - return [ - 4,0,8,50,6,-1,2,0,7,0,2,1,7,1,2,2,7,2,2,3,7,3,2,4,7,4,2,5,7,5,2, - 6,7,6,2,7,7,7,1,0,1,0,1,0,1,1,1,1,1,2,1,2,1,3,1,3,1,4,1,4,5,4,29, - 8,4,10,4,12,4,32,9,4,1,4,1,4,1,5,1,5,3,5,38,8,5,1,6,1,6,1,7,1,7, - 5,7,44,8,7,10,7,12,7,47,9,7,1,7,1,7,1,45,0,8,1,3,3,4,5,5,7,6,9,7, - 11,0,13,0,15,8,1,0,2,5,0,48,57,95,95,183,183,768,879,8255,8256,13, - 0,65,90,97,122,192,214,216,246,248,767,880,893,895,8191,8204,8205, - 8304,8591,11264,12271,12289,55295,63744,64975,65008,65533,50,0,1, - 1,0,0,0,0,3,1,0,0,0,0,5,1,0,0,0,0,7,1,0,0,0,0,9,1,0,0,0,0,15,1,0, - 0,0,1,17,1,0,0,0,3,20,1,0,0,0,5,22,1,0,0,0,7,24,1,0,0,0,9,26,1,0, - 0,0,11,37,1,0,0,0,13,39,1,0,0,0,15,41,1,0,0,0,17,18,5,47,0,0,18, - 19,5,47,0,0,19,2,1,0,0,0,20,21,5,47,0,0,21,4,1,0,0,0,22,23,5,42, - 0,0,23,6,1,0,0,0,24,25,5,33,0,0,25,8,1,0,0,0,26,30,3,13,6,0,27,29, - 3,11,5,0,28,27,1,0,0,0,29,32,1,0,0,0,30,28,1,0,0,0,30,31,1,0,0,0, - 31,33,1,0,0,0,32,30,1,0,0,0,33,34,6,4,0,0,34,10,1,0,0,0,35,38,3, - 13,6,0,36,38,7,0,0,0,37,35,1,0,0,0,37,36,1,0,0,0,38,12,1,0,0,0,39, - 40,7,1,0,0,40,14,1,0,0,0,41,45,5,39,0,0,42,44,9,0,0,0,43,42,1,0, - 0,0,44,47,1,0,0,0,45,46,1,0,0,0,45,43,1,0,0,0,46,48,1,0,0,0,47,45, - 1,0,0,0,48,49,5,39,0,0,49,16,1,0,0,0,4,0,30,37,45,1,1,4,0 - ] - -class XPathLexer(Lexer): - - atn = ATNDeserializer().deserialize(serializedATN()) - - decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ] - - TOKEN_REF = 1 - RULE_REF = 2 - ANYWHERE = 3 - ROOT = 4 - WILDCARD = 5 - BANG = 6 - ID = 7 - STRING = 8 - - channelNames = [ u"DEFAULT_TOKEN_CHANNEL", u"HIDDEN" ] - - modeNames = [ u"DEFAULT_MODE" ] - - literalNames = [ u"", - u"'//'", u"'/'", u"'*'", u"'!'" ] - - symbolicNames = [ u"", - u"TOKEN_REF", u"RULE_REF", u"ANYWHERE", u"ROOT", u"WILDCARD", - u"BANG", u"ID", u"STRING" ] - - ruleNames = [ u"ANYWHERE", u"ROOT", u"WILDCARD", u"BANG", u"ID", u"NameChar", - u"NameStartChar", u"STRING" ] - - grammarFileName = u"XPathLexer.g4" - - def __init__(self, input=None, output=sys.stdout): - super(XPathLexer, self).__init__(input, output=output) - self.checkVersion("4.11.2-SNAPSHOT") - self._interp = LexerATNSimulator(self, self.atn, self.decisionsToDFA, PredictionContextCache()) - self._actions = None - self._predicates = None - - - def action(self, localctx, ruleIndex, actionIndex): - if self._actions is None: - actions = dict() - actions[4] = self.ID_action - self._actions = actions - action = self._actions.get(ruleIndex, None) - if action is not None: - action(localctx, actionIndex) - else: - raise Exception("No registered action for:" + str(ruleIndex)) - - - def ID_action(self, localctx , actionIndex): - if actionIndex == 0: - - char = self.text[0] - if char.isupper(): - self.type = XPathLexer.TOKEN_REF - else: - self.type = XPathLexer.RULE_REF - - - - diff --git a/runtime/Python2/src/antlr4/xpath/__init__.py b/runtime/Python2/src/antlr4/xpath/__init__.py deleted file mode 100644 index 216c000dc5..0000000000 --- a/runtime/Python2/src/antlr4/xpath/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__author__ = 'ericvergnaud' diff --git a/runtime/Python2/tests/TestTokenStreamRewriter.py b/runtime/Python2/tests/TestTokenStreamRewriter.py deleted file mode 100644 index 15933e5372..0000000000 --- a/runtime/Python2/tests/TestTokenStreamRewriter.py +++ /dev/null @@ -1,524 +0,0 @@ -# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. -# Use of this file is governed by the BSD 3-clause license that -# can be found in the LICENSE.txt file in the project root. - -import unittest - -from mocks.TestLexer import TestLexer, TestLexer2 -from antlr4.TokenStreamRewriter import TokenStreamRewriter -from antlr4.InputStream import InputStream -from antlr4.CommonTokenStream import CommonTokenStream - - -class TestTokenStreamRewriter(unittest.TestCase): - def testInsertBeforeIndexZero(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - rewriter.insertBeforeIndex(0, '0') - - self.assertEquals(rewriter.getDefaultText(), '0abc') - - def testInsertAfterLastIndex(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - rewriter.insertAfter(10, 'x') - - self.assertEquals(rewriter.getDefaultText(), 'abcx') - - def test2InsertBeforeAfterMiddleIndex(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.insertBeforeIndex(1, 'x') - rewriter.insertAfter(1, 'x') - - self.assertEquals(rewriter.getDefaultText(), 'axbxc') - - def testReplaceIndex(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceIndex(0, 'x') - - self.assertEquals(rewriter.getDefaultText(), 'xbc') - - def testReplaceLastIndex(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceIndex(2, 'x') - - self.assertEquals(rewriter.getDefaultText(), 'abx') - - def testReplaceMiddleIndex(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceIndex(1, 'x') - - self.assertEquals(rewriter.getDefaultText(), 'axc') - - def testToStringStartStop(self): - input = InputStream('x = 3 * 0;') - lexer = TestLexer2(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceRange(4, 8, '0') - - self.assertEquals(rewriter.getDefaultText(), 'x = 0;') - self.assertEquals(rewriter.getText('default', 0, 9), 'x = 0;') - self.assertEquals(rewriter.getText('default', 4, 8), '0') - - def testToStringStartStop2(self): - input = InputStream('x = 3 * 0 + 2 * 0;') - lexer = TestLexer2(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - self.assertEquals('x = 3 * 0 + 2 * 0;', rewriter.getDefaultText()) - - # replace 3 * 0 with 0 - rewriter.replaceRange(4, 8, '0') - self.assertEquals('x = 0 + 2 * 0;', rewriter.getDefaultText()) - self.assertEquals('x = 0 + 2 * 0;', rewriter.getText('default', 0, 17)) - self.assertEquals('0', rewriter.getText('default', 4, 8)) - self.assertEquals('x = 0', rewriter.getText('default', 0, 8)) - self.assertEquals('2 * 0', rewriter.getText('default', 12, 16)) - - rewriter.insertAfter(17, "// comment") - self.assertEquals('2 * 0;// comment', rewriter.getText('default', 12, 18)) - - self.assertEquals('x = 0', rewriter.getText('default', 0, 8)) - - def test2ReplaceMiddleIndex(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceIndex(1, 'x') - rewriter.replaceIndex(1, 'y') - - self.assertEquals('ayc', rewriter.getDefaultText()) - - def test2ReplaceMiddleIndex1InsertBefore(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.insertBeforeIndex(0, "_") - rewriter.replaceIndex(1, 'x') - rewriter.replaceIndex(1, 'y') - - self.assertEquals('_ayc', rewriter.getDefaultText()) - - def test2InsertMiddleIndex(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.insertBeforeIndex(1, 'x') - rewriter.insertBeforeIndex(1, 'y') - - self.assertEquals('ayxbc', rewriter.getDefaultText()) - - def testReplaceThenDeleteMiddleIndex(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceRange(0, 2, 'x') - rewriter.insertBeforeIndex(1, '0') - - with self.assertRaises(ValueError) as ctx: - rewriter.getDefaultText() - self.assertEquals( - 'insert op ,1:1]:"0"> within boundaries of previous ,1:0]..[@2,2:2=\'c\',<3>,1:2]:"x">', - ctx.exception.message - ) - - def testInsertThenReplaceSameIndex(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.insertBeforeIndex(0, '0') - rewriter.replaceIndex(0, 'x') - - self.assertEquals('0xbc', rewriter.getDefaultText()) - - def test2InsertThenReplaceIndex0(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.insertBeforeIndex(0, 'x') - rewriter.insertBeforeIndex(0, 'y') - rewriter.replaceIndex(0, 'z') - - self.assertEquals('yxzbc', rewriter.getDefaultText()) - - def testReplaceThenInsertBeforeLastIndex(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceIndex(2, 'x') - rewriter.insertBeforeIndex(2, 'y') - - self.assertEquals('abyx', rewriter.getDefaultText()) - - def testReplaceThenInsertAfterLastIndex(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceIndex(2, 'x') - rewriter.insertAfter(2, 'y') - - self.assertEquals('abxy', rewriter.getDefaultText()) - - def testReplaceRangeThenInsertAtLeftEdge(self): - input = InputStream('abcccba') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceRange(2, 4, 'x') - rewriter.insertBeforeIndex(2, 'y') - - self.assertEquals('abyxba', rewriter.getDefaultText()) - - def testReplaceRangeThenInsertAtRightEdge(self): - input = InputStream('abcccba') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceRange(2, 4, 'x') - rewriter.insertBeforeIndex(4, 'y') - - with self.assertRaises(ValueError) as ctx: - rewriter.getDefaultText() - msg = ctx.exception.message - self.assertEquals( - "insert op ,1:4]:\"y\"> within boundaries of previous ,1:2]..[@4,4:4='c',<3>,1:4]:\"x\">", - msg - ) - - def testReplaceRangeThenInsertAfterRightEdge(self): - input = InputStream('abcccba') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceRange(2, 4, 'x') - rewriter.insertAfter(4, 'y') - - self.assertEquals('abxyba', rewriter.getDefaultText()) - - def testReplaceAll(self): - input = InputStream('abcccba') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceRange(0, 6, 'x') - - self.assertEquals('x', rewriter.getDefaultText()) - - def testReplaceSubsetThenFetch(self): - input = InputStream('abcccba') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceRange(2, 4, 'xyz') - - self.assertEquals('abxyzba', rewriter.getDefaultText()) - - def testReplaceThenReplaceSuperset(self): - input = InputStream('abcccba') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceRange(2, 4, 'xyz') - rewriter.replaceRange(3, 5, 'foo') - - with self.assertRaises(ValueError) as ctx: - rewriter.getDefaultText() - msg = ctx.exception.message - self.assertEquals( - """replace op boundaries of ,1:3]..[@5,5:5='b',<2>,1:5]:"foo"> overlap with previous ,1:2]..[@4,4:4='c',<3>,1:4]:"xyz">""", - msg - ) - - def testReplaceThenReplaceLowerIndexedSuperset(self): - input = InputStream('abcccba') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceRange(2, 4, 'xyz') - rewriter.replaceRange(1, 3, 'foo') - - with self.assertRaises(ValueError) as ctx: - rewriter.getDefaultText() - msg = ctx.exception.message - self.assertEquals( - """replace op boundaries of ,1:1]..[@3,3:3='c',<3>,1:3]:"foo"> overlap with previous ,1:2]..[@4,4:4='c',<3>,1:4]:"xyz">""", - msg - ) - - def testReplaceSingleMiddleThenOverlappingSuperset(self): - input = InputStream('abcba') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceIndex(2, 'xyz') - rewriter.replaceRange(0, 3, 'foo') - - self.assertEquals('fooa', rewriter.getDefaultText()) - - def testCombineInserts(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.insertBeforeIndex(0, 'x') - rewriter.insertBeforeIndex(0, 'y') - - self.assertEquals('yxabc', rewriter.getDefaultText()) - - def testCombine3Inserts(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.insertBeforeIndex(1, 'x') - rewriter.insertBeforeIndex(0, 'y') - rewriter.insertBeforeIndex(1, 'z') - - self.assertEquals('yazxbc', rewriter.getDefaultText()) - - def testCombineInsertOnLeftWithReplace(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceRange(0, 2, 'foo') - rewriter.insertBeforeIndex(0, 'z') - - self.assertEquals('zfoo', rewriter.getDefaultText()) - - def testCombineInsertOnLeftWithDelete(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.delete('default', 0, 2) - rewriter.insertBeforeIndex(0, 'z') - - self.assertEquals('z', rewriter.getDefaultText()) - - def testDisjointInserts(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.insertBeforeIndex(1, 'x') - rewriter.insertBeforeIndex(2, 'y') - rewriter.insertBeforeIndex(0, 'z') - - self.assertEquals('zaxbyc', rewriter.getDefaultText()) - - def testOverlappingReplace(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceRange(1, 2, 'foo') - rewriter.replaceRange(0, 3, 'bar') - - self.assertEquals('bar', rewriter.getDefaultText()) - - def testOverlappingReplace2(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceRange(0, 3, 'bar') - rewriter.replaceRange(1, 2, 'foo') - - with self.assertRaises(ValueError) as ctx: - rewriter.getDefaultText() - - self.assertEquals( - """replace op boundaries of ,1:1]..[@2,2:2='c',<3>,1:2]:"foo"> overlap with previous ,1:0]..[@3,3:2='',<-1>,1:3]:"bar">""", - ctx.exception.message - ) - - def testOverlappingReplace3(self): - input = InputStream('abcc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceRange(1, 2, 'foo') - rewriter.replaceRange(0, 2, 'bar') - - self.assertEquals('barc', rewriter.getDefaultText()) - - def testOverlappingReplace4(self): - input = InputStream('abcc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceRange(1, 2, 'foo') - rewriter.replaceRange(1, 3, 'bar') - - self.assertEquals('abar', rewriter.getDefaultText()) - - def testDropIdenticalReplace(self): - input = InputStream('abcc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceRange(1, 2, 'foo') - rewriter.replaceRange(1, 2, 'foo') - - self.assertEquals('afooc', rewriter.getDefaultText()) - - def testDropPrevCoveredInsert(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.insertBeforeIndex(1, 'foo') - rewriter.replaceRange(1, 2, 'foo') - - self.assertEquals('afoofoo', rewriter.getDefaultText()) - - def testLeaveAloneDisjointInsert(self): - input = InputStream('abcc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.insertBeforeIndex(1, 'x') - rewriter.replaceRange(2, 3, 'foo') - - self.assertEquals('axbfoo', rewriter.getDefaultText()) - - def testLeaveAloneDisjointInsert2(self): - input = InputStream('abcc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.replaceRange(2, 3, 'foo') - rewriter.insertBeforeIndex(1, 'x') - - self.assertEquals('axbfoo', rewriter.getDefaultText()) - - def testInsertBeforeTokenThenDeleteThatToken(self): - input = InputStream('abc') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.insertBeforeIndex(1, 'foo') - rewriter.replaceRange(1, 2, 'foo') - - self.assertEquals('afoofoo', rewriter.getDefaultText()) - - def testPreservesOrderOfContiguousInserts(self): - """ - Test for fix for: https://github.com/antlr/antlr4/issues/550 - """ - input = InputStream('aa') - lexer = TestLexer(input) - stream = CommonTokenStream(lexer=lexer) - stream.fill() - rewriter = TokenStreamRewriter(tokens=stream) - - rewriter.insertBeforeIndex(0, '') - rewriter.insertAfter(0, '') - rewriter.insertBeforeIndex(1, '') - rewriter.insertAfter(1, '') - - self.assertEquals('aa', rewriter.getDefaultText()) - - -if __name__ == '__main__': - unittest.main() diff --git a/runtime/Python2/tests/__init__.py b/runtime/Python2/tests/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/runtime/Python2/tests/mocks/TestLexer.py b/runtime/Python2/tests/mocks/TestLexer.py deleted file mode 100644 index 89c438b636..0000000000 --- a/runtime/Python2/tests/mocks/TestLexer.py +++ /dev/null @@ -1,101 +0,0 @@ -# Generated from /Users/lyga/Dropbox/code/python/antlr4-learn/test_grammar/T.g4 by ANTLR 4.5.3 -# encoding: utf-8 -from __future__ import print_function -from antlr4 import * -from io import StringIO - - -def serializedATN(): - with StringIO() as buf: - buf.write(u"\3\u0430\ud6d1\u8206\uad2d\u4417\uaef1\u8d80\uaadd\2") - buf.write(u"\5\17\b\1\4\2\t\2\4\3\t\3\4\4\t\4\3\2\3\2\3\3\3\3\3\4") - buf.write(u"\3\4\2\2\5\3\3\5\4\7\5\3\2\2\16\2\3\3\2\2\2\2\5\3\2\2") - buf.write(u"\2\2\7\3\2\2\2\3\t\3\2\2\2\5\13\3\2\2\2\7\r\3\2\2\2\t") - buf.write(u"\n\7c\2\2\n\4\3\2\2\2\13\f\7d\2\2\f\6\3\2\2\2\r\16\7") - buf.write(u"e\2\2\16\b\3\2\2\2\3\2\2") - return buf.getvalue() - - -class TestLexer(Lexer): - atn = ATNDeserializer().deserialize(serializedATN()) - - decisionsToDFA = [DFA(ds, i) for i, ds in enumerate(atn.decisionToState)] - - A = 1 - B = 2 - C = 3 - - modeNames = [u"DEFAULT_MODE"] - - literalNames = [u"", - u"'a'", u"'b'", u"'c'"] - - symbolicNames = [u"", - u"A", u"B", u"C"] - - ruleNames = [u"A", u"B", u"C"] - - grammarFileName = u"T.g4" - - def __init__(self, input=None): - super(TestLexer, self).__init__(input) - self.checkVersion("4.9.1") - self._interp = LexerATNSimulator(self, self.atn, self.decisionsToDFA, PredictionContextCache()) - self._actions = None - self._predicates = None - - - -def serializedATN2(): - with StringIO() as buf: - buf.write(u"\3\u0430\ud6d1\u8206\uad2d\u4417\uaef1\u8d80\uaadd\2") - buf.write(u"\t(\b\1\4\2\t\2\4\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7\t") - buf.write(u"\7\4\b\t\b\3\2\6\2\23\n\2\r\2\16\2\24\3\3\6\3\30\n\3") - buf.write(u"\r\3\16\3\31\3\4\3\4\3\5\3\5\3\6\3\6\3\7\3\7\3\b\6\b") - buf.write(u"%\n\b\r\b\16\b&\2\2\t\3\3\5\4\7\5\t\6\13\7\r\b\17\t\3") - buf.write(u"\2\2*\2\3\3\2\2\2\2\5\3\2\2\2\2\7\3\2\2\2\2\t\3\2\2\2") - buf.write(u"\2\13\3\2\2\2\2\r\3\2\2\2\2\17\3\2\2\2\3\22\3\2\2\2\5") - buf.write(u"\27\3\2\2\2\7\33\3\2\2\2\t\35\3\2\2\2\13\37\3\2\2\2\r") - buf.write(u"!\3\2\2\2\17$\3\2\2\2\21\23\4c|\2\22\21\3\2\2\2\23\24") - buf.write(u"\3\2\2\2\24\22\3\2\2\2\24\25\3\2\2\2\25\4\3\2\2\2\26") - buf.write(u"\30\4\62;\2\27\26\3\2\2\2\30\31\3\2\2\2\31\27\3\2\2\2") - buf.write(u"\31\32\3\2\2\2\32\6\3\2\2\2\33\34\7=\2\2\34\b\3\2\2\2") - buf.write(u"\35\36\7?\2\2\36\n\3\2\2\2\37 \7-\2\2 \f\3\2\2\2!\"\7") - buf.write(u",\2\2\"\16\3\2\2\2#%\7\"\2\2$#\3\2\2\2%&\3\2\2\2&$\3") - buf.write(u"\2\2\2&\'\3\2\2\2\'\20\3\2\2\2\6\2\24\31&\2") - return buf.getvalue() - - -class TestLexer2(Lexer): - - atn = ATNDeserializer().deserialize(serializedATN2()) - - decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ] - - - ID = 1 - INT = 2 - SEMI = 3 - ASSIGN = 4 - PLUS = 5 - MULT = 6 - WS = 7 - - modeNames = [ u"DEFAULT_MODE" ] - - literalNames = [ u"", - u"';'", u"'='", u"'+'", u"'*'" ] - - symbolicNames = [ u"", - u"ID", u"INT", u"SEMI", u"ASSIGN", u"PLUS", u"MULT", u"WS" ] - - ruleNames = [ u"ID", u"INT", u"SEMI", u"ASSIGN", u"PLUS", u"MULT", u"WS" ] - - grammarFileName = u"T2.g4" - - def __init__(self, input=None): - super(TestLexer2, self).__init__(input) - self.checkVersion("4.9.1") - self._interp = LexerATNSimulator(self, self.atn, self.decisionsToDFA, PredictionContextCache()) - self._actions = None - self._predicates = None diff --git a/runtime/Python2/tests/mocks/__init__.py b/runtime/Python2/tests/mocks/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/runtime/Python2/tests/run.py b/runtime/Python2/tests/run.py deleted file mode 100644 index c9ae18877e..0000000000 --- a/runtime/Python2/tests/run.py +++ /dev/null @@ -1,7 +0,0 @@ -import sys -import os -src_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'src') -sys.path.insert(0,src_path) -from TestTokenStreamRewriter import TestTokenStreamRewriter -import unittest -unittest.main() \ No newline at end of file diff --git a/scripts/files-to-update.txt b/scripts/files-to-update.txt index a2ef45ada4..e3b0361791 100644 --- a/scripts/files-to-update.txt +++ b/scripts/files-to-update.txt @@ -10,8 +10,6 @@ runtime/Go/antlr/v4/recognizer.go runtime/Go/antlr/v4/antlrdoc.go runtime/Python3/pyproject.toml runtime/Python3/src/antlr4/Recognizer.py -runtime/Python2/setup.py -runtime/Python2/src/antlr4/Recognizer.py runtime/Dart/pubspec.yaml runtime/Dart/lib/src/runtime_meta_data.dart runtime/Java/src/org/antlr/v4/runtime/RuntimeMetaData.java @@ -46,7 +44,6 @@ runtime-testsuite/resources/org/antlr/v4/test/runtime/helpers/package_ts.json # Just documenting that these are generated... # runtime/Python3/src/antlr4/xpath/XPathLexer.py -# runtime/Python2/src/antlr4/xpath/XPathLexer.py # runtime/Cpp/runtime/src/tree/xpath/XPathLexer.cpp # runtime/Cpp/runtime/src/tree/xpath/XPathLexer.h # runtime/CSharp/src/Tree/Xpath/XPathLexer.cs diff --git a/scripts/github_release_notes.py b/scripts/github_release_notes.py index 08670087a8..05942b8c6e 100644 --- a/scripts/github_release_notes.py +++ b/scripts/github_release_notes.py @@ -5,7 +5,7 @@ from collections import Counter import sys -TARGETS = ['csharp', 'cpp', 'go', 'java', 'javascript', 'python2', 'python3', 'swift', 'php', 'dart'] +TARGETS = ['csharp', 'cpp', 'go', 'java', 'javascript', 'python3', 'swift', 'php', 'dart'] TOKEN=sys.argv[1] MILESTONE=sys.argv[2] diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeEscapes.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeEscapes.java index 9bcbece485..863ed1ed3e 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeEscapes.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeEscapes.java @@ -19,7 +19,6 @@ public void latinJavaEscape() { @Test public void latinPythonEscape() { - checkUnicodeEscape("\\u0061", 0x0061, "Python2"); checkUnicodeEscape("\\u0061", 0x0061, "Python3"); } @@ -35,7 +34,6 @@ public void bmpJavaEscape() { @Test public void bmpPythonEscape() { - checkUnicodeEscape("\\uABCD", 0xABCD, "Python2"); checkUnicodeEscape("\\uABCD", 0xABCD, "Python3"); } @@ -51,7 +49,6 @@ public void smpJavaEscape() { @Test public void smpPythonEscape() { - checkUnicodeEscape("\\U0001F4A9", 0x1F4A9, "Python2"); checkUnicodeEscape("\\U0001F4A9", 0x1F4A9, "Python3"); } diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Python2/Python2.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Python2/Python2.stg deleted file mode 100644 index 269e6c9d69..0000000000 --- a/tool/resources/org/antlr/v4/tool/templates/codegen/Python2/Python2.stg +++ /dev/null @@ -1,829 +0,0 @@ -/* - * [The "BSD license"] - * Copyright (c) 2012-2016 Terence Parr - * Copyright (c) 2012-2016 Sam Harwell - * Copyright (c) 2014 Eric Vergnaud - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/** ANTLR tool checks output templates are compatible with tool code generation. - * For now, a simple string match used on x.y of x.y.z scheme. - * Must match Tool.VERSION during load to templates. - * - * REQUIRED. - */ - -pythonTypeInitMap ::= [ - "bool":"False", - "int":"0", - "float":"0.0", - "str":"", - default:"None" // anything other than a primitive type is an object -] - -// args must be , - -ParserFile(file, parser, namedActions, contextSuperClass) ::= << - -# encoding: utf-8 -from __future__ import print_function -from antlr4 import * -from io import StringIO -import sys - - - - ->> - -ListenerFile(file, header, namedActions) ::= << - -from antlr4 import * -
      - -# This class defines a complete listener for a parse tree produced by . -class Listener(ParseTreeListener): - - #. -def enter(self, ctx): - pass - -# Exit a parse tree produced by #. -def exit(self, ctx): - pass - -}; separator="\n"> - ->> - - -VisitorFile(file, header, namedActions) ::= << - -from antlr4 import * -
      - -# This class defines a complete generic visitor for a parse tree produced by . - -class Visitor(ParseTreeVisitor): - - #. -def visit(self, ctx): - return self.visitChildren(ctx) - -}; separator="\n"> - ->> - - -fileHeader(grammarFileName, ANTLRVersion) ::= << -# Generated from by ANTLR ->> - -Parser(parser, funcs, atn, sempredFuncs, superClass) ::= << - ->> - -Parser_(parser, funcs, atn, sempredFuncs, ctor, superClass) ::= << - -if __name__ is not None and "." in __name__: - from . import -else: - from import - - - - -class ( Parser ): - - grammarFileName = "" - - atn = ATNDeserializer().deserialize(serializedATN()) - - decisionsToDFA = [ DFA(ds, i) for i, ds in enumerate(atn.decisionToState) ] - - sharedContextCache = PredictionContextCache() - - literalNames = [ }; null="u\"\\"", separator=", ", wrap, anchor> ] - - symbolicNames = [ }; null="u\"\\"", separator=", ", wrap, anchor> ] - - - = }; separator="\n", wrap, anchor> - - - ruleNames = [ "}; separator=", ", wrap, anchor> ] - - EOF = .EOF - - =}; separator="\n", wrap, anchor> - - - - - - - - - - - def sempred(self, localctx, ruleIndex, predIndex): - if self._predicates == None: - self._predicates = dict() -] = self._sempred}; separator="\n "> - pred = self._predicates.get(ruleIndex, None) - if pred is None: - raise Exception("No predicate with index:" + str(ruleIndex)) - else: - return pred(localctx, predIndex) - - - - - - ->> - -dumpActions(recog, argFuncs, actionFuncs, sempredFuncs) ::= << - -def action(self, localctx, ruleIndex, actionIndex): - if self._actions is None: - actions = dict() -] = self._action }; separator="\n"> - self._actions = actions - action = self._actions.get(ruleIndex, None) - if action is not None: - action(localctx, actionIndex) - else: - raise Exception("No registered action for:" + str(ruleIndex)) - - - - - -def sempred(self, localctx, ruleIndex, predIndex): - if self._predicates is None: - preds = dict() -] = self._sempred}; separator="\n"> - self._predicates = preds - pred = self._predicates.get(ruleIndex, None) - if pred is not None: - return pred(localctx, predIndex) - else: - raise Exception("No registered predicate for:" + str(ruleIndex)) - - - ->> - -parser_ctor(p) ::= << -def __init__(self, input, output=sys.stdout): - super(, self).__init__(input, output=output) - self.checkVersion("") - self._interp = ParserATNSimulator(self, self.atn, self.decisionsToDFA, self.sharedContextCache) - self._predicates = None - ->> - -/* This generates a private method since the actionIndex is generated, making an - * overriding implementation impossible to maintain. - */ -RuleActionFunction(r, actions) ::= << - -def _action(self, localctx , actionIndex): - - if actionIndex == : - - - elif actionIndex == : - - }; separator="\n"> ->> - -/* This generates a private method since the predIndex is generated, making an - * overriding implementation impossible to maintain. - */ -RuleSempredFunction(r, actions) ::= << -def _sempred(self, localctx, predIndex): - - if predIndex == : - return - - elif predIndex == : - return - }; separator="\n"> - ->> - -RuleFunction(currentRule,args,code,locals,ruleCtx,altLabelCtxs,namedActions,finallyAction,postamble,exceptions) ::= << - - - -}; separator="\n"> - -def (self}>): - - localctx = .(self, self._ctx, self.state}>) - self.enterRule(localctx, , self.RULE_) - - - try: - - - - - - - except RecognitionException as re: - localctx.exception = re - self._errHandler.reportError(self, re) - self._errHandler.recover(self, re) - - finally: - - self.exitRule() - return localctx - ->> - -LeftRecursiveRuleFunction(currentRule,args,code,locals,ruleCtx,altLabelCtxs, - namedActions,finallyAction,postamble) ::= -<< - - -}; separator="\n"> - -def (self, _p=0, }>): - _parentctx = self._ctx - _parentState = self.state - localctx = .(self, self._ctx, _parentState}>) - _prevctx = localctx - _startState = - self.enterRecursionRule(localctx, , self.RULE_, _p) - - - try: - - - - except RecognitionException as re: - localctx.exception = re - self._errHandler.reportError(self, re) - self._errHandler.recover(self, re) - finally: - - self.unrollRecursionContexts(_parentctx) - return localctx - ->> - -CodeBlockForOuterMostAlt(currentOuterMostAltCodeBlock, locals, preamble, ops) ::= << -localctx = .Context(self, localctx) -self.enterOuterAlt(localctx, ) - ->> - -CodeBlockForAlt(currentAltCodeBlock, locals, preamble, ops) ::= << - - - ->> - -LL1AltBlock(choice, preamble, alts, error) ::= << -self.state = -self._errHandler.sync(self) - = _input.LT(1) - -token = self._input.LA(1) - - - pass}; separator="\nel"> -else: - - ->> - -LL1OptionalBlock(choice, alts, error) ::= << -self.state = -self._errHandler.sync(self) -token = self._input.LA(1) - - - pass}; separator="\nel"> -else: - pass ->> - -LL1OptionalBlockSingleAlt(choice, expr, alts, preamble, error, followExpr) ::= << -self.state = -self._errHandler.sync(self) - -if : - - -) ) !> ->> - - -LL1StarBlockSingleAlt(choice, loopExpr, alts, preamble, iteration) ::= << -self.state = -self._errHandler.sync(self) - -while : - - self.state = - self._errHandler.sync(self) - - ->> - -LL1PlusBlockSingleAlt(choice, loopExpr, alts, preamble, iteration) ::= << -self.state = -self._errHandler.sync(self) - -while True: - - self.state = - self._errHandler.sync(self) - - if not (): - break - ->> - -// LL(*) stuff - -AltBlock(choice, preamble, alts, error) ::= << -self.state = -self._errHandler.sync(self) - = _input.LT(1) - -la_ = self._interp.adaptivePredict(self._input,,self._ctx) -: - - pass -}; separator="\nel"> - ->> - -OptionalBlock(choice, alts, error) ::= << -self.state = -self._errHandler.sync(self) -la_ = self._interp.adaptivePredict(self._input,,self._ctx) -+1: - -}; separator="\nel"> - ->> - -StarBlock(choice, alts, sync, iteration) ::= << -self.state = -self._errHandler.sync(self) -_alt = self._interp.adaptivePredict(self._input,,self._ctx) -while _alt!= and _alt!=ATN.INVALID_ALT_NUMBER: - if _alt==1+1: - - - self.state = - self._errHandler.sync(self) - _alt = self._interp.adaptivePredict(self._input,,self._ctx) - ->> - -PlusBlock(choice, alts, error) ::= << -self.state = -self._errHandler.sync(self) -_alt = 1+1 -while _alt!= and _alt!=ATN.INVALID_ALT_NUMBER: - +1: - -}; separator="\nel"> - else: - - self.state = - self._errHandler.sync(self) - _alt = self._interp.adaptivePredict(self._input,,self._ctx) - ->> - -Sync(s) ::= "sync()" - -ThrowNoViableAlt(t) ::= "raise NoViableAltException(self)" - -TestSetInline(s) ::= << -}; separator=" or "> ->> - -// Java language spec 15.19 - shift operators mask operands rather than overflow to 0... need range test -testShiftInRange(shiftAmount) ::= << -(() & ~0x3f) == 0 ->> - -bitsetBitfieldComparison(s, bits) ::= <% -(})> and ((1 \<\< ) & ) != 0) -%> - -isZero ::= [ -"0":true, -default:false -] - -offsetShiftVar(shiftAmount, offset) ::= <% -( - ) -%> - -offsetShiftType(shiftAmount, offset) ::= <% -(. - ). -%> - -bitsetInlineComparison(s, bits) ::= <% -==}; separator=" or "> -%> - -cases(tokens) ::= << -if token in [}; separator=", ">]: ->> - -InvokeRule(r, argExprsChunks) ::= << -self.state = - = }>self.(,) ->> - -MatchToken(m) ::= << -self.state = - = }>self.match(.) ->> - -MatchSet(m, expr, capture) ::= "" - -MatchNotSet(m, expr, capture) ::= "" - -CommonSetStuff(m, expr, capture, invert) ::= << -self.state = - = }>self._input.LT(1) - -if \<= 0 or if not(): - = }> self._errHandler.recoverInline(self) -else: - self._errHandler.reportMatch(self) - self.consume() ->> - -Wildcard(w) ::= << -self.state = - = }>self.matchWildcard() ->> - -// ACTION STUFF - -Action(a, foo, chunks) ::= "" - -ArgAction(a, chunks) ::= "" - -SemPred(p, chunks, failChunks) ::= << -self.state = -if not : - from antlr4.error.Errors import FailedPredicateException - raise FailedPredicateException(self, , , ) ->> - -ExceptionClause(e, catchArg, catchAction) ::= << -except : - ->> - -// lexer actions are not associated with model objects - -LexerSkipCommand() ::= "skip()" -LexerMoreCommand() ::= "more()" -LexerPopModeCommand() ::= "popMode()" - -LexerTypeCommand(arg, grammar) ::= "_type = " -LexerChannelCommand(arg, grammar) ::= "_channel = " -LexerModeCommand(arg, grammar) ::= "_mode = " -LexerPushModeCommand(arg, grammar) ::= "pushMode()" - -ActionText(t) ::= "" -ActionTemplate(t) ::= "" -ArgRef(a) ::= "localctx." -LocalRef(a) ::= "localctx." -RetValueRef(a) ::= "localctx." -QRetValueRef(a) ::= ".." -/** How to translate $tokenLabel */ -TokenRef(t) ::= "." -LabelRef(t) ::= "." -ListLabelRef(t) ::= "." -SetAttr(s,rhsChunks) ::= ". = " - -TokenLabelType() ::= "" -InputSymbolType() ::= "" - -TokenPropertyRef_text(t) ::= "(None if . is None else ..text)" -TokenPropertyRef_type(t) ::= "(0 if . is None else ..type)" -TokenPropertyRef_line(t) ::= "(0 if . is None else ..line)" -TokenPropertyRef_pos(t) ::= "(0 if . is None else ..column)" -TokenPropertyRef_channel(t) ::= "(0 if . is None else ..channel)" -TokenPropertyRef_index(t) ::= "(0 if . is None else ..tokenIndex)" -TokenPropertyRef_int(t) ::= "(0 if . is None else int(..text))" - -RulePropertyRef_start(r) ::= "(None if . is None else ..start)" -RulePropertyRef_stop(r) ::= "(None if . is None else ..stop)" -RulePropertyRef_text(r) ::= "(None if . is None else self._input.getText(..start,..stop))" -RulePropertyRef_ctx(r) ::= "." -RulePropertyRef_parser(r) ::= "self" - -ThisRulePropertyRef_start(r) ::= "localctx.start" -ThisRulePropertyRef_stop(r) ::= "localctx.stop" -ThisRulePropertyRef_text(r) ::= "self._input.getText(localctx.start, self._input.LT(-1))" -ThisRulePropertyRef_ctx(r) ::= "localctx" -ThisRulePropertyRef_parser(r) ::= "self" - -NonLocalAttrRef(s) ::= "self.getInvokingContext()." -SetNonLocalAttr(s, rhsChunks) ::= "self.getInvokingContext(). = " - -AddToLabelList(a) ::= "..append()" - -TokenDecl(t) ::= "self. = None # " -TokenTypeDecl(t) ::= "self. = 0 # type" -TokenListDecl(t) ::= "self. = list() # of s" -RuleContextDecl(r) ::= "self. = None # " -RuleContextListDecl(rdecl) ::= "self. = list() # of s" - -ContextTokenGetterDecl(t) ::= << -def (self): - return self.getToken(., 0) ->> - -// should never be called -ContextTokenListGetterDecl(t) ::= << -def _list(self): - return self.getTokens(.) ->> - -ContextTokenListIndexedGetterDecl(t) ::= << -def (self, i=None): - if i is None: - return self.getTokens(.) - else: - return self.getToken(., i) ->> - -ContextRuleGetterDecl(r) ::= << -def (self): - return self.getTypedRuleContext(.,0) - ->> - -// should never be called -ContextRuleListGetterDecl(r) ::= << -def _list(self): - return self.getTypedRuleContexts(.) - ->> - -ContextRuleListIndexedGetterDecl(r) ::= << -def (self, i=None): - if i is None: - return self.getTypedRuleContexts(.) - else: - return self.getTypedRuleContext(.,i) - ->> - -LexerRuleContext() ::= "RuleContext" - -/** The rule context name is the rule followed by a suffix; e.g., - * r becomes rContext. - */ -RuleContextNameSuffix() ::= "Context" - -ImplicitTokenLabel(tokenName) ::= "_" -ImplicitRuleLabel(ruleName) ::= "_" -ImplicitSetLabel(id) ::= "_tset" -ListLabelName(label) ::= "