diff --git a/contributors.txt b/contributors.txt index 54ff875dc1..482ce2148f 100644 --- a/contributors.txt +++ b/contributors.txt @@ -215,3 +215,4 @@ YYYY/MM/DD, github id, Full name, email 2019/02/06, ralucado, Cristina Raluca Vijulie, ralucris.v[at]gmail[dot]com 2019/03/13, base698, Justin Thomas, justin.thomas1@gmail.com 2019/03/18, carlodri, Carlo Dri, carlo.dri@gmail.com +2019/04/06, dberlin, Daniel Berlin, dberlin@dberlin.org diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/api/TestIncremental.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/api/TestIncremental.java new file mode 100644 index 0000000000..ab3f85a731 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/api/TestIncremental.java @@ -0,0 +1,338 @@ +/* + * Copyright (c) 2019 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ +package org.antlr.v4.test.runtime.java.api; + +import org.antlr.v4.runtime.*; +import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.runtime.tree.xpath.XPath; +import org.junit.Assert; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +class XPathExpectation { + public String XPathRule; + public String tree; + public Class classType; + public int epoch; + + public XPathExpectation(String XPathRule, String tree, Class classType, + int epoch) { + this.XPathRule = XPathRule; + this.tree = tree; + this.classType = classType; + this.epoch = epoch; + } +} + +class XPathExpectationBuilder { + private String XPathRule; + private String tree; + private Class classType; + private int epoch; + + public XPathExpectationBuilder setXPathRule(String XPathRule) { + this.XPathRule = XPathRule; + return this; + } + + public XPathExpectationBuilder setTree(String tree) { + this.tree = tree; + return this; + } + + public XPathExpectationBuilder setClassType(Class classType) { + this.classType = classType; + return this; + } + + public XPathExpectationBuilder setEpoch(int epoch) { + this.epoch = epoch; + return this; + } + + public XPathExpectation createXPathExpectation() { + return new XPathExpectation(XPathRule, tree, classType, epoch); + } +} + +public class TestIncremental { + + String SAMPLE_TEXT_1 = "foo 5555 foo 5555 foo"; + String EXPECTED_TREE_1 = "(program (identifier foo) (digits 5555) (identifier foo) (digits 5555) (identifier foo))"; + + String SAMPLE_TEXT_2 = "foo 5555 5555 foo"; + String EXPECTED_TREE_2 = + "(program (identifier foo) (digits 5555) (digits 5555) (identifier foo))"; + + String SAMPLE_TEXT_3 = "foo 5555 foo 5555 foo foo"; + String EXPECTED_TREE_3 = + "(program (identifier foo) (digits 5555) (identifier foo) (digits 5555) (identifier foo) (identifier foo))"; + + String JAVA_PROGRAM_1 = + "\npublic class HelloWorld {\n\n public static void main(String[] args) {\n // Prints \"Hello, World\" to the terminal window.\n System.out.println(\"Hello, World\");\n }\n\n}\n"; + String JAVA_EXPECTED_TREE_1 = + "(compilationUnit (typeDeclaration (classOrInterfaceDeclaration (classOrInterfaceModifiers (classOrInterfaceModifier public)) (classDeclaration (normalClassDeclaration class HelloWorld (classBody { (classBodyDeclaration (modifiers (modifier public) (modifier static)) (memberDecl void main (voidMethodDeclaratorRest (formalParameters ( (formalParameterDecls variableModifiers (type (classOrInterfaceType String) [ ]) (formalParameterDeclsRest (variableDeclaratorId args))) )) (methodBody (block { (blockStatement (statement (statementExpression (expression (expression (expression (expression (primary System)) . out) . println) ( (expressionList (expression (primary (literal \"Hello, World\")))) ))) ;)) }))))) }))))) )"; + String JAVA_PROGRAM_2 = + "\npublic class HelloWorld {\n\n public static void main(String[] args) {\n // Prints \"Hello, World\" to the terminal window.\n System.out.println(\"Hello\");\n }\n\n}\n"; + String JAVA_EXPECTED_TREE_2 = + "(compilationUnit (typeDeclaration (classOrInterfaceDeclaration (classOrInterfaceModifiers (classOrInterfaceModifier public)) (classDeclaration (normalClassDeclaration class HelloWorld (classBody { (classBodyDeclaration (modifiers (modifier public) (modifier static)) (memberDecl void main (voidMethodDeclaratorRest (formalParameters ( (formalParameterDecls variableModifiers (type (classOrInterfaceType String) [ ]) (formalParameterDeclsRest (variableDeclaratorId args))) )) (methodBody (block { (blockStatement (statement (statementExpression (expression (expression (expression (expression (primary System)) . out) . println) ( (expressionList (expression (primary (literal \"Hello\")))) ))) ;)) }))))) }))))) )"; + + /** + * This test verifies the behavior of the incremental parser as a non-incremental parser. + */ + @Test + public void testBasicIncrementalParse() { + TestIncrementalBasicLexer lexer = new TestIncrementalBasicLexer(new ANTLRInputStream(SAMPLE_TEXT_1)); + TestIncrementalBasicParser parser = new TestIncrementalBasicParser(new IncrementalTokenStream(lexer)); + int startingEpoch = parser.getParserEpoch(); + + IncrementalParserRuleContext tree = parser.program(); + Assert.assertEquals(EXPECTED_TREE_1, tree.toStringTree(parser)); + // Should have been created by the first parser. + Assert.assertEquals(startingEpoch, tree.epoch); + } + + /** + * This test reparses text and asserts that the context was reused. + */ + @Test + public void testBasicIncrementalReparse() { + TestIncrementalBasicLexer lexer = new TestIncrementalBasicLexer(new ANTLRInputStream(SAMPLE_TEXT_1)); + IncrementalTokenStream tokenStream = new IncrementalTokenStream(lexer); + TestIncrementalBasicParser parser = new TestIncrementalBasicParser(tokenStream); + int startingEpoch = parser.getParserEpoch(); + IncrementalParserRuleContext firstTree = parser.program(); + Assert.assertEquals(EXPECTED_TREE_1, firstTree.toStringTree(parser)); + // Should have been created by the first parser. + Assert.assertEquals(startingEpoch, firstTree.epoch); + + // Parse the same text with the old tree. + lexer = new TestIncrementalBasicLexer(new ANTLRInputStream(SAMPLE_TEXT_1)); + tokenStream = new IncrementalTokenStream(lexer); + IncrementalParserData parserData = new IncrementalParserData(tokenStream, new ArrayList(), firstTree); + parser = new TestIncrementalBasicParser(tokenStream, parserData); + int secondEpoch = parser.getParserEpoch(); + IncrementalParserRuleContext secondTree = parser.program(); + Assert.assertEquals(EXPECTED_TREE_1, secondTree.toStringTree(parser)); + // Should have been created by the first parser. + Assert.assertEquals(startingEpoch, secondTree.epoch); + + } + + // Test that reparsing with a delete reuses data not deleted. + @Test + public void testBasicIncrementalDeleteWithWhitespace() { + TestIncrementalBasicLexer lexer = new TestIncrementalBasicLexer(new ANTLRInputStream(SAMPLE_TEXT_1)); + IncrementalTokenStream tokenStream = new IncrementalTokenStream(lexer); + TestIncrementalBasicParser parser = new TestIncrementalBasicParser(tokenStream); + int startingEpoch = parser.getParserEpoch(); + IncrementalParserRuleContext firstTree = parser.program(); + Assert.assertEquals(EXPECTED_TREE_1, firstTree.toStringTree(parser)); + // Should have been created by the first parser. + Assert.assertEquals(startingEpoch, firstTree.epoch); + + // Delete a token and incrementally parse with the old tree. + List oldTokens = tokenStream.getTokens(); + lexer = new TestIncrementalBasicLexer(new ANTLRInputStream(SAMPLE_TEXT_2)); + tokenStream = new IncrementalTokenStream(lexer); + TokenChange firstChange = new TokenChangeBuilder() + .setChangeType(TokenChangeType.REMOVED) + .setOldToken((CommonToken) oldTokens.get(3)) + .createTokenChange(); + TokenChange secondChange = new TokenChangeBuilder() + .setChangeType(TokenChangeType.REMOVED) + .setOldToken((CommonToken) oldTokens.get(4)) + .createTokenChange(); + + ArrayList changes = new ArrayList(Arrays.asList(firstChange, secondChange)); + IncrementalParserData parserData = new IncrementalParserData(tokenStream, changes, firstTree); + parser = new TestIncrementalBasicParser(tokenStream, parserData); + int secondEpoch = parser.getParserEpoch(); + IncrementalParserRuleContext secondTree = parser.program(); + Assert.assertEquals(EXPECTED_TREE_2, secondTree.toStringTree(parser)); + // Should have been created by the second parser. + Assert.assertEquals(secondEpoch, secondTree.epoch); + // But all child nodes should have come from the old parse tree + for (ParseTree child : secondTree.children) { + IncrementalParserRuleContext incChild = (IncrementalParserRuleContext) child; + Assert.assertEquals(startingEpoch, incChild.epoch); + } + } + + // Test that reparsing with a add reuses data not added. + @Test + public void testBasicIncrementalAddWithWhitespace() { + TestIncrementalBasicLexer lexer = new TestIncrementalBasicLexer(new ANTLRInputStream(SAMPLE_TEXT_1)); + IncrementalTokenStream tokenStream = new IncrementalTokenStream(lexer); + TestIncrementalBasicParser parser = new TestIncrementalBasicParser(tokenStream); + int startingEpoch = parser.getParserEpoch(); + IncrementalParserRuleContext firstTree = parser.program(); + Assert.assertEquals(EXPECTED_TREE_1, firstTree.toStringTree(parser)); + // Should have been created by the first parser. + Assert.assertEquals(startingEpoch, firstTree.epoch); + + // Add some tokens and incrementally reparse. + lexer = new TestIncrementalBasicLexer(new ANTLRInputStream(SAMPLE_TEXT_3)); + tokenStream = new IncrementalTokenStream(lexer); + tokenStream.fill(); + TokenChange firstChange = new TokenChangeBuilder() + .setChangeType(TokenChangeType.ADDED) + .setNewToken((CommonToken) tokenStream.get(9)) + .createTokenChange(); + TokenChange secondChange = new TokenChangeBuilder() + .setChangeType(TokenChangeType.ADDED) + .setNewToken((CommonToken) tokenStream.get(10)) + .createTokenChange(); + + ArrayList changes = new ArrayList(Arrays.asList(firstChange, secondChange)); + IncrementalParserData parserData = new IncrementalParserData(tokenStream, changes, firstTree); + parser = new TestIncrementalBasicParser(tokenStream, parserData); + int secondEpoch = parser.getParserEpoch(); + IncrementalParserRuleContext secondTree = parser.program(); + + Assert.assertEquals(EXPECTED_TREE_3, secondTree.toStringTree(parser)); + // Should have been created by the second parser. + Assert.assertEquals(secondEpoch, secondTree.epoch); + // All but the last child nodes should have come from the old parse tree + for (int i = 0; i < secondTree.getChildCount() - 1; ++i) { + IncrementalParserRuleContext incChild = (IncrementalParserRuleContext) secondTree.getChild(i); + Assert.assertEquals(startingEpoch, incChild.epoch); + } + int lastChildIdx = secondTree.getChildCount() - 1; + IncrementalParserRuleContext incChild = (IncrementalParserRuleContext) secondTree.getChild(lastChildIdx); + Assert.assertEquals(secondEpoch, incChild.epoch); + } + + /** + * This test verifies the behavior of the incremental parser as a non-incremental parser. + */ + @Test + public void testJavaIncrementalParse() { + TestIncrementalJavaLexer lexer = new TestIncrementalJavaLexer(new ANTLRInputStream(JAVA_PROGRAM_1)); + TestIncrementalJavaParser parser = new TestIncrementalJavaParser(new IncrementalTokenStream(lexer)); + int startingEpoch = parser.getParserEpoch(); + + IncrementalParserRuleContext tree = parser.compilationUnit(); + Assert.assertEquals(JAVA_EXPECTED_TREE_1, tree.toStringTree(parser)); + // Should have been created by the first parser. + Assert.assertEquals(startingEpoch, tree.epoch); + } + + /** + * This test reparses text and asserts that the context was reused. + */ + @Test + public void testJavaIncrementalReparse() { + TestIncrementalJavaLexer lexer = new TestIncrementalJavaLexer(new ANTLRInputStream(JAVA_PROGRAM_1)); + IncrementalTokenStream tokenStream = new IncrementalTokenStream(lexer); + TestIncrementalJavaParser parser = new TestIncrementalJavaParser(tokenStream); + int startingEpoch = parser.getParserEpoch(); + IncrementalParserRuleContext firstTree = parser.compilationUnit(); + Assert.assertEquals(JAVA_EXPECTED_TREE_1, firstTree.toStringTree(parser)); + // Should have been created by the first parser. + Assert.assertEquals(startingEpoch, firstTree.epoch); + + // Parse the same text with the old tree. + lexer = new TestIncrementalJavaLexer(new ANTLRInputStream(JAVA_PROGRAM_1)); + tokenStream = new IncrementalTokenStream(lexer); + IncrementalParserData parserData = new IncrementalParserData(tokenStream, new ArrayList(), firstTree); + parser = new TestIncrementalJavaParser(tokenStream, parserData); + int secondEpoch = parser.getParserEpoch(); + IncrementalParserRuleContext secondTree = parser.compilationUnit(); + Assert.assertEquals(JAVA_EXPECTED_TREE_1, secondTree.toStringTree(parser)); + // Should have been created by the first parser. + Assert.assertEquals(startingEpoch, secondTree.epoch); + + } + + /** + * This test changes a token in the java program and asserts that the right contexts were reused. + */ + @Test + public void testJavaIncrementalReparseWithChange() { + TestIncrementalJavaLexer lexer = new TestIncrementalJavaLexer(new ANTLRInputStream(JAVA_PROGRAM_1)); + IncrementalTokenStream tokenStream = new IncrementalTokenStream(lexer); + TestIncrementalJavaParser parser = new TestIncrementalJavaParser(tokenStream); + int startingEpoch = parser.getParserEpoch(); + IncrementalParserRuleContext firstTree = parser.compilationUnit(); + Assert.assertEquals(JAVA_EXPECTED_TREE_1, firstTree.toStringTree(parser)); + // Should have been created by the first parser. + Assert.assertEquals(startingEpoch, firstTree.epoch); + List oldTokens = tokenStream.getTokens(); + + // Parse slightly changed text + lexer = new TestIncrementalJavaLexer(new ANTLRInputStream(JAVA_PROGRAM_2)); + tokenStream = new IncrementalTokenStream(lexer); + tokenStream.fill(); + TokenChange firstChange = new TokenChangeBuilder() + .setChangeType(TokenChangeType.CHANGED) + .setOldToken((CommonToken) oldTokens.get(21)) + .setNewToken((CommonToken) tokenStream.get(21)) + .createTokenChange(); + + ArrayList changes = new ArrayList(Arrays.asList(firstChange)); + IncrementalParserData parserData = new IncrementalParserData(tokenStream, changes, firstTree); + parser = new TestIncrementalJavaParser(tokenStream, parserData); + int secondEpoch = parser.getParserEpoch(); + IncrementalParserRuleContext secondTree = parser.compilationUnit(); + Assert.assertEquals(JAVA_EXPECTED_TREE_2, secondTree.toStringTree(parser)); + + // Should have been created by the second parser. + Assert.assertEquals(secondEpoch, secondTree.epoch); + // Verify we reused contexts that are reusable + ArrayList expectations = new ArrayList(Arrays.asList( + new XPathExpectationBuilder() + .setClassType(TestIncrementalJavaParser.ClassOrInterfaceModifiersContext.class) + .setTree("(classOrInterfaceModifiers (classOrInterfaceModifier public))") + .setXPathRule("//classOrInterfaceModifiers") + .setEpoch(startingEpoch) + .createXPathExpectation(), + new XPathExpectationBuilder() + .setClassType(TestIncrementalJavaParser.FormalParametersContext.class) + .setTree("(formalParameters ( (formalParameterDecls variableModifiers (type (classOrInterfaceType String) [ ]) (formalParameterDeclsRest (variableDeclaratorId args))) ))") + .setXPathRule("//formalParameters") + .setEpoch(startingEpoch) + .createXPathExpectation(), + new XPathExpectationBuilder() + .setClassType(TestIncrementalJavaParser.ModifiersContext.class) + .setTree("(modifiers (modifier public) (modifier static))") + .setXPathRule("//modifiers") + .setEpoch(startingEpoch) + .createXPathExpectation(), + new XPathExpectationBuilder() + .setClassType(TestIncrementalJavaParser.LiteralContext.class) + .setTree("(literal \"Hello\")") + .setXPathRule("//expression/primary/literal") + .setEpoch(secondEpoch) + .createXPathExpectation())); + /* This requires reusing individual recursion contexts */ + /* + { + class: ExpressionContext, + tree: "System.out.println", + xpathRule: "//statementExpression/expression/expression", + },*/ + verifyXPathExpectations(parser, secondTree, expectations); + } + + // Verify a set of xpath expectations against the parse tree + private void verifyXPathExpectations(IncrementalParser parser, + IncrementalParserRuleContext parseTree, + List expectations) { + for (XPathExpectation expectation : expectations) { + for (ParseTree XPathMatch : XPath.findAll(parseTree, expectation.XPathRule, parser)) { + Assert.assertTrue("Class of context is wrong", + expectation.classType.isInstance(XPathMatch)); + IncrementalParserRuleContext incCtx = (IncrementalParserRuleContext) XPathMatch; + Assert.assertEquals("Tree of context is wrong", incCtx.toStringTree(parser), expectation.tree); + Assert.assertEquals("Epoch of context is wrong", incCtx.epoch, expectation.epoch); + } + } + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/api/TestIncrementalBasic.g4 b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/api/TestIncrementalBasic.g4 new file mode 100644 index 0000000000..b1a2c10a75 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/api/TestIncrementalBasic.g4 @@ -0,0 +1,12 @@ +grammar TestIncrementalBasic; +options { + incremental = true; +} +program: (identifier | digits)+; +identifier: IDENT; +digits: DIGITS; +// We deliberately put these on a hidden channel rather than skip - it helps +// make the cases weirder by making the parser's token indexes non-contiguous. +WS: [ \t\r\n\u000C]+ -> channel(HIDDEN); +IDENT: [A-Za-z]+; +DIGITS: [0-9]+; diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/api/TestIncrementalJava.g4 b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/api/TestIncrementalJava.g4 new file mode 100644 index 0000000000..18af5422f5 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/api/TestIncrementalJava.g4 @@ -0,0 +1,1251 @@ +/* + [The "BSD licence"] + Copyright (c) 2007-2008 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +/** A Java 1.5 grammar for ANTLR v3 derived from the spec + * + * This is a very close representation of the spec; the changes + * are comestic (remove left recursion) and also fixes (the spec + * isn't exactly perfect). I have run this on the 1.4.2 source + * and some nasty looking enums from 1.5, but have not really + * tested for 1.5 compatibility. + * + * I built this with: java -Xmx100M org.antlr.Tool java.g + * and got two errors that are ok (for now): + * java.g:691:9: Decision can match input such as + * "'0'..'9'{'E', 'e'}{'+', '-'}'0'..'9'{'D', 'F', 'd', 'f'}" + * using multiple alternatives: 3, 4 + * As a result, alternative(s) 4 were disabled for that input + * java.g:734:35: Decision can match input such as "{'$', 'A'..'Z', + * '_', 'a'..'z', '\u00C0'..'\u00D6', '\u00D8'..'\u00F6', + * '\u00F8'..'\u1FFF', '\u3040'..'\u318F', '\u3300'..'\u337F', + * '\u3400'..'\u3D2D', '\u4E00'..'\u9FFF', '\uF900'..'\uFAFF'}" + * using multiple alternatives: 1, 2 + * As a result, alternative(s) 2 were disabled for that input + * + * You can turn enum on/off as a keyword :) + * + * Version 1.0 -- initial release July 5, 2006 (requires 3.0b2 or higher) + * + * Primary author: Terence Parr, July 2006 + * + * Version 1.0.1 -- corrections by Koen Vanderkimpen & Marko van Dooren, + * October 25, 2006; + * fixed normalInterfaceDeclaration: now uses typeParameters instead + * of typeParameter (according to JLS, 3rd edition) + * fixed castExpression: no longer allows expression next to type + * (according to semantics in JLS, in contrast with syntax in JLS) + * + * Version 1.0.2 -- Terence Parr, Nov 27, 2006 + * java spec I built this from had some bizarre for-loop control. + * Looked weird and so I looked elsewhere...Yep, it's messed up. + * simplified. + * + * Version 1.0.3 -- Chris Hogue, Feb 26, 2007 + * Factored out an annotationName rule and used it in the annotation rule. + * Not sure why, but typeName wasn't recognizing references to inner + * annotations (e.g. @InterfaceName.InnerAnnotation()) + * Factored out the elementValue section of an annotation reference. Created + * elementValuePair and elementValuePairs rules, then used them in the + * annotation rule. Allows it to recognize annotation references with + * multiple, comma separated attributes. + * Updated elementValueArrayInitializer so that it allows multiple elements. + * (It was only allowing 0 or 1 element). + * Updated localVariableDeclaration to allow annotations. Interestingly the JLS + * doesn't appear to indicate this is legal, but it does work as of at least + * JDK 1.5.0_06. + * Moved the Identifier portion of annotationTypeElementRest to annotationMethodRest. + * Because annotationConstantRest already references variableDeclarator which + * has the Identifier portion in it, the parser would fail on constants in + * annotation definitions because it expected two identifiers. + * Added optional trailing ';' to the alternatives in annotationTypeElementRest. + * Wouldn't handle an inner interface that has a trailing ';'. + * Swapped the expression and type rule reference order in castExpression to + * make it check for genericized casts first. It was failing to recognize a + * statement like "Class TYPE = (Class)...;" because it was seeing + * 'Class'. + * Changed createdName to use typeArguments instead of nonWildcardTypeArguments. + * Changed the 'this' alternative in primary to allow 'identifierSuffix' rather than + * just 'arguments'. The case it couldn't handle was a call to an explicit + * generic method invocation (e.g. this.doSomething()). Using identifierSuffix + * may be overly aggressive--perhaps should create a more constrained thisSuffix rule? + * + * Version 1.0.4 -- Hiroaki Nakamura, May 3, 2007 + * + * Fixed formalParameterDecls, localVariableDeclaration, forInit, + * and forVarControl to use variableModifier* not 'final'? (annotation)? + * + * Version 1.0.5 -- Terence, June 21, 2007 + * --a[i].foo didn't work. Fixed unaryExpression + * + * Version 1.0.6 -- John Ridgway, March 17, 2008 + * Made "assert" a switchable keyword like "enum". + * Fixed compilationUnit to disallow "annotation importDeclaration ...". + * Changed "Identifier ('.' Identifier)*" to "qualifiedName" in more + * places. + * Changed modifier* and/or variableModifier* to classOrInterfaceModifiers, + * modifiers or variableModifiers, as appropriate. + * Renamed "bound" to "typeBound" to better match language in the JLS. + * Added "memberDeclaration" which rewrites to methodDeclaration or + * fieldDeclaration and pulled type into memberDeclaration. So we parse + * type and then move on to decide whether we're dealing with a field + * or a method. + * Modified "constructorDeclaration" to use "constructorBody" instead of + * "methodBody". constructorBody starts with explicitConstructorInvocation, + * then goes on to blockStatement*. Pulling explicitConstructorInvocation + * out of expressions allowed me to simplify "primary". + * Changed variableDeclarator to simplify it. + * Changed type to use classOrInterfaceType, thus simplifying it; of course + * I then had to add classOrInterfaceType, but it is used in several + * places. + * Fixed annotations, old version allowed "@X(y,z)", which is illegal. + * Added optional comma to end of "elementValueArrayInitializer"; as per JLS. + * Changed annotationTypeElementRest to use normalClassDeclaration and + * normalInterfaceDeclaration rather than classDeclaration and + * interfaceDeclaration, thus getting rid of a couple of grammar ambiguities. + * Split localVariableDeclaration into localVariableDeclarationStatement + * (includes the terminating semi-colon) and localVariableDeclaration. + * This allowed me to use localVariableDeclaration in "forInit" clauses, + * simplifying them. + * Changed switchBlockStatementGroup to use multiple labels. This adds an + * ambiguity, but if one uses appropriately greedy parsing it yields the + * parse that is closest to the meaning of the switch statement. + * Renamed "forVarControl" to "enhancedForControl" -- JLS language. + * Added semantic predicates to test for shift operations rather than other + * things. Thus, for instance, the string "< <" will never be treated + * as a left-shift operator. + * In "creator" we rule out "nonWildcardTypeArguments" on arrayCreation, + * which are illegal. + * Moved "nonWildcardTypeArguments into innerCreator. + * Removed 'super' superSuffix from explicitGenericInvocation, since that + * is only used in explicitConstructorInvocation at the beginning of a + * constructorBody. (This is part of the simplification of expressions + * mentioned earlier.) + * Simplified primary (got rid of those things that are only used in + * explicitConstructorInvocation). + * Lexer -- removed "Exponent?" from FloatingPointLiteral choice 4, since it + * led to an ambiguity. + * + * This grammar successfully parses every .java file in the JDK 1.5 source + * tree (excluding those whose file names include '-', which are not + * valid Java compilation units). + * + * June 26, 2008 + * + * conditionalExpression had wrong precedence x?y:z. + * + * February 26, 2011 + * added left-recursive expression rule + * + * Known remaining problems: + * "Letter" and "JavaIDDigit" are wrong. The actual specification of + * "Letter" should be "a character for which the method + * Character.isJavaIdentifierStart(int) returns true." A "Java + * letter-or-digit is a character for which the method + * Character.isJavaIdentifierPart(int) returns true." + */ +grammar TestIncrementalJava; +options { + incremental = true; +} + +// starting point for parsing a java file +/* The annotations are separated out to make parsing faster, but must be associated with + a packageDeclaration or a typeDeclaration (and not an empty one). */ +compilationUnit + : annotations + ( packageDeclaration importDeclaration* typeDeclaration* + | classOrInterfaceDeclaration typeDeclaration* + ) + EOF + | packageDeclaration? importDeclaration* typeDeclaration* + EOF + ; + +packageDeclaration + : 'package' qualifiedName ';' + ; + +importDeclaration + : 'import' 'static'? qualifiedName ('.' '*')? ';' + ; + +typeDeclaration + : classOrInterfaceDeclaration + | ';' + ; + +classOrInterfaceDeclaration + : classOrInterfaceModifiers (classDeclaration | interfaceDeclaration) + ; + +classOrInterfaceModifiers + : classOrInterfaceModifier* + ; + +classOrInterfaceModifier + : annotation // class or interface + | ( 'public' // class or interface + | 'protected' // class or interface + | 'private' // class or interface + | 'abstract' // class or interface + | 'static' // class or interface + | 'final' // class only -- does not apply to interfaces + | 'strictfp' // class or interface + ) + ; + +modifiers + : modifier* + ; + +classDeclaration + : normalClassDeclaration + | enumDeclaration + ; + +normalClassDeclaration + : 'class' Identifier typeParameters? + ('extends' type)? + ('implements' typeList)? + classBody + ; + +typeParameters + : '<' typeParameter (',' typeParameter)* '>' + ; + +typeParameter + : Identifier ('extends' typeBound)? + ; + +typeBound + : type ('&' type)* + ; + +enumDeclaration + : ENUM Identifier ('implements' typeList)? enumBody + ; + +enumBody + : '{' enumConstants? ','? enumBodyDeclarations? '}' + ; + +enumConstants + : enumConstant (',' enumConstant)* + ; + +enumConstant + : annotations? Identifier arguments? classBody? + ; + +enumBodyDeclarations + : ';' (classBodyDeclaration)* + ; + +interfaceDeclaration + : normalInterfaceDeclaration + | annotationTypeDeclaration + ; + +normalInterfaceDeclaration + : 'interface' Identifier typeParameters? ('extends' typeList)? interfaceBody + ; + +typeList + : type (',' type)* + ; + +classBody + : '{' classBodyDeclaration* '}' + ; + +interfaceBody + : '{' interfaceBodyDeclaration* '}' + ; + +classBodyDeclaration + : ';' + | 'static'? block + | modifiers memberDecl + ; + +memberDecl + : genericMethodOrConstructorDecl + | memberDeclaration + | 'void' Identifier voidMethodDeclaratorRest + | Identifier constructorDeclaratorRest + | interfaceDeclaration + | classDeclaration + ; + +memberDeclaration + : type (methodDeclaration | fieldDeclaration) + ; + +genericMethodOrConstructorDecl + : typeParameters genericMethodOrConstructorRest + ; + +genericMethodOrConstructorRest + : (type | 'void') Identifier methodDeclaratorRest + | Identifier constructorDeclaratorRest + ; + +methodDeclaration + : Identifier methodDeclaratorRest + ; + +fieldDeclaration + : variableDeclarators ';' + ; + +interfaceBodyDeclaration + : modifiers interfaceMemberDecl + | ';' + ; + +interfaceMemberDecl + : interfaceMethodOrFieldDecl + | interfaceGenericMethodDecl + | 'void' Identifier voidInterfaceMethodDeclaratorRest + | interfaceDeclaration + | classDeclaration + ; + +interfaceMethodOrFieldDecl + : type Identifier interfaceMethodOrFieldRest + ; + +interfaceMethodOrFieldRest + : constantDeclaratorsRest ';' + | interfaceMethodDeclaratorRest + ; + +methodDeclaratorRest + : formalParameters ('[' ']')* + ('throws' qualifiedNameList)? + ( methodBody + | ';' + ) + ; + +voidMethodDeclaratorRest + : formalParameters ('throws' qualifiedNameList)? + ( methodBody + | ';' + ) + ; + +interfaceMethodDeclaratorRest + : formalParameters ('[' ']')* ('throws' qualifiedNameList)? ';' + ; + +interfaceGenericMethodDecl + : typeParameters (type | 'void') Identifier + interfaceMethodDeclaratorRest + ; + +voidInterfaceMethodDeclaratorRest + : formalParameters ('throws' qualifiedNameList)? ';' + ; + +constructorDeclaratorRest + : formalParameters ('throws' qualifiedNameList)? constructorBody + ; + +constantDeclarator + : Identifier constantDeclaratorRest + ; + +variableDeclarators + : variableDeclarator (',' variableDeclarator)* + ; + +variableDeclarator + : variableDeclaratorId ('=' variableInitializer)? + ; + +constantDeclaratorsRest + : constantDeclaratorRest (',' constantDeclarator)* + ; + +constantDeclaratorRest + : ('[' ']')* '=' variableInitializer + ; + +variableDeclaratorId + : Identifier ('[' ']')* + ; + +variableInitializer + : arrayInitializer + | expression + ; + +arrayInitializer + : '{' (variableInitializer (',' variableInitializer)* (',')? )? '}' + ; + +modifier + : annotation + | ( 'public' + | 'protected' + | 'private' + | 'static' + | 'abstract' + | 'final' + | 'native' + | 'synchronized' + | 'transient' + | 'volatile' + | 'strictfp' + ) + ; + +packageOrTypeName + : qualifiedName + ; + +enumConstantName + : Identifier + ; + +typeName + : qualifiedName + ; + +type + : classOrInterfaceType ('[' ']')* + | primitiveType ('[' ']')* + ; + +classOrInterfaceType + : Identifier typeArguments? ('.' Identifier typeArguments? )* + ; + +primitiveType + : 'boolean' + | 'char' + | 'byte' + | 'short' + | 'int' + | 'long' + | 'float' + | 'double' + ; + +variableModifier + : 'final' + | annotation + ; + +typeArguments + : '<' typeArgument (',' typeArgument)* '>' + ; + +typeArgument + : type + | '?' (('extends' | 'super') type)? + ; + +qualifiedNameList + : qualifiedName (',' qualifiedName)* + ; + +formalParameters + : '(' formalParameterDecls? ')' + ; + +formalParameterDecls + : variableModifiers type formalParameterDeclsRest + ; + +formalParameterDeclsRest + : variableDeclaratorId (',' formalParameterDecls)? + | '...' variableDeclaratorId + ; + +methodBody + : block + ; + +constructorBody + : block + ; + +qualifiedName + : Identifier ('.' Identifier)* + ; + +literal + : IntegerLiteral + | FloatingPointLiteral + | CharacterLiteral + | StringLiteral + | BooleanLiteral + | 'null' + ; + +// ANNOTATIONS + +annotations + : annotation+ + ; + +annotation + : '@' annotationName ( '(' ( elementValuePairs | elementValue )? ')' )? + ; + +annotationName + : Identifier ('.' Identifier)* + ; + +elementValuePairs + : elementValuePair (',' elementValuePair)* + ; + +elementValuePair + : Identifier '=' elementValue + ; + +elementValue + : expression + | annotation + | elementValueArrayInitializer + ; + +elementValueArrayInitializer + : '{' (elementValue (',' elementValue)*)? (',')? '}' + ; + +annotationTypeDeclaration + : '@' 'interface' Identifier annotationTypeBody + ; + +annotationTypeBody + : '{' (annotationTypeElementDeclaration)* '}' + ; + +annotationTypeElementDeclaration + : modifiers annotationTypeElementRest + | ';' // this is not allowed by the grammar, but apparently allowed by the actual compiler + ; + +annotationTypeElementRest + : type annotationMethodOrConstantRest ';' + | normalClassDeclaration ';'? + | normalInterfaceDeclaration ';'? + | enumDeclaration ';'? + | annotationTypeDeclaration ';'? + ; + +annotationMethodOrConstantRest + : annotationMethodRest + | annotationConstantRest + ; + +annotationMethodRest + : Identifier '(' ')' defaultValue? + ; + +annotationConstantRest + : variableDeclarators + ; + +defaultValue + : 'default' elementValue + ; + +// STATEMENTS / BLOCKS + +block + : '{' blockStatement* '}' + ; + +blockStatement + : localVariableDeclarationStatement + | classOrInterfaceDeclaration + | statement + ; + +localVariableDeclarationStatement + : localVariableDeclaration ';' + ; + +localVariableDeclaration + : variableModifiers type variableDeclarators + ; + +variableModifiers + : variableModifier* + ; + +statement + : block + | ASSERT expression (':' expression)? ';' + | 'if' parExpression statement ('else' statement)? + | 'for' '(' forControl ')' statement + | 'while' parExpression statement + | 'do' statement 'while' parExpression ';' + | 'try' block (catches finallyBlock? | finallyBlock) + | 'try' resourceSpecification block catches? finallyBlock? + | 'switch' parExpression '{' switchBlockStatementGroups '}' + | 'synchronized' parExpression block + | 'return' expression? ';' + | 'throw' expression ';' + | 'break' Identifier? ';' + | 'continue' Identifier? ';' + | ';' + | statementExpression ';' + | Identifier ':' statement + ; + +catches + : catchClause+ + ; + +catchClause + : 'catch' '(' variableModifiers catchType Identifier ')' block + ; + +catchType + : qualifiedName ('|' qualifiedName)* + ; + +finallyBlock + : 'finally' block + ; + +resourceSpecification + : '(' resources ';'? ')' + ; + +resources + : resource (';' resource)* + ; + +resource + : variableModifiers classOrInterfaceType variableDeclaratorId '=' expression + ; + +formalParameter + : variableModifiers type variableDeclaratorId + ; + +switchBlockStatementGroups + : (switchBlockStatementGroup)* + ; + +/* The change here (switchLabel -> switchLabel+) technically makes this grammar + ambiguous; but with appropriately greedy parsing it yields the most + appropriate AST, one in which each group, except possibly the last one, has + labels and statements. */ +switchBlockStatementGroup + : switchLabel+ blockStatement* + ; + +switchLabel + : 'case' constantExpression ':' + | 'case' enumConstantName ':' + | 'default' ':' + ; + +forControl + : enhancedForControl + | forInit? ';' expression? ';' forUpdate? + ; + +forInit + : localVariableDeclaration + | expressionList + ; + +enhancedForControl + : variableModifiers type Identifier ':' expression + ; + +forUpdate + : expressionList + ; + +// EXPRESSIONS + +parExpression + : '(' expression ')' + ; + +expressionList + : expression (',' expression)* + ; + +statementExpression + : expression + ; + +constantExpression + : expression + ; + +expression + : primary + | expression '.' Identifier + | expression '.' 'this' + | expression '.' 'new' nonWildcardTypeArguments? innerCreator + | expression '.' 'super' superSuffix + | expression '.' explicitGenericInvocation + | 'new' creator + | expression '[' expression ']' + | '(' type ')' expression + | expression ('++' | '--') + | expression '(' expressionList? ')' + | ('+'|'-'|'++'|'--') expression + | ('~'|'!') expression + | expression ('*'|'/'|'%') expression + | expression ('+'|'-') expression + | expression ('<' '<' | '>' '>' '>' | '>' '>') expression + | expression ('<=' | '>=' | '>' | '<') expression + | expression 'instanceof' type + | expression ('==' | '!=') expression + | expression '&' expression + | expression '^' expression + | expression '|' expression + | expression '&&' expression + | expression '||' expression + | expression '?' expression ':' expression + | expression + ( '=' + | '+=' + | '-=' + | '*=' + | '/=' + | '&=' + | '|=' + | '^=' + | '>>=' + | '>>>=' + | '<<=' + | '%=' + ) + expression + ; + +primary + : '(' expression ')' + | 'this' + | 'super' + | literal + | Identifier + | type '.' 'class' + | 'void' '.' 'class' + | nonWildcardTypeArguments (explicitGenericInvocationSuffix | 'this' arguments) + ; + +creator + : nonWildcardTypeArguments createdName classCreatorRest + | createdName (arrayCreatorRest | classCreatorRest) + ; + +createdName + : Identifier typeArgumentsOrDiamond? ('.' Identifier typeArgumentsOrDiamond?)* + | primitiveType + ; + +innerCreator + : Identifier nonWildcardTypeArgumentsOrDiamond? classCreatorRest + ; + +arrayCreatorRest + : '[' + ( ']' ('[' ']')* arrayInitializer + | expression ']' ('[' expression ']')* ('[' ']')* + ) + ; + +classCreatorRest + : arguments classBody? + ; + +explicitGenericInvocation + : nonWildcardTypeArguments explicitGenericInvocationSuffix + ; + +nonWildcardTypeArguments + : '<' typeList '>' + ; + +typeArgumentsOrDiamond + : '<' '>' + | typeArguments + ; + +nonWildcardTypeArgumentsOrDiamond + : '<' '>' + | nonWildcardTypeArguments + ; + +superSuffix + : arguments + | '.' Identifier arguments? + ; + +explicitGenericInvocationSuffix + : 'super' superSuffix + | Identifier arguments + ; + +arguments + : '(' expressionList? ')' + ; + +// LEXER + +// §3.9 Keywords + +ABSTRACT : 'abstract'; +ASSERT : 'assert'; +BOOLEAN : 'boolean'; +BREAK : 'break'; +BYTE : 'byte'; +CASE : 'case'; +CATCH : 'catch'; +CHAR : 'char'; +CLASS : 'class'; +CONST : 'const'; +CONTINUE : 'continue'; +DEFAULT : 'default'; +DO : 'do'; +DOUBLE : 'double'; +ELSE : 'else'; +ENUM : 'enum'; +EXTENDS : 'extends'; +FINAL : 'final'; +FINALLY : 'finally'; +FLOAT : 'float'; +FOR : 'for'; +IF : 'if'; +GOTO : 'goto'; +IMPLEMENTS : 'implements'; +IMPORT : 'import'; +INSTANCEOF : 'instanceof'; +INT : 'int'; +INTERFACE : 'interface'; +LONG : 'long'; +NATIVE : 'native'; +NEW : 'new'; +PACKAGE : 'package'; +PRIVATE : 'private'; +PROTECTED : 'protected'; +PUBLIC : 'public'; +RETURN : 'return'; +SHORT : 'short'; +STATIC : 'static'; +STRICTFP : 'strictfp'; +SUPER : 'super'; +SWITCH : 'switch'; +SYNCHRONIZED : 'synchronized'; +THIS : 'this'; +THROW : 'throw'; +THROWS : 'throws'; +TRANSIENT : 'transient'; +TRY : 'try'; +VOID : 'void'; +VOLATILE : 'volatile'; +WHILE : 'while'; + +// §3.10.1 Integer Literals + +IntegerLiteral + : DecimalIntegerLiteral + | HexIntegerLiteral + | OctalIntegerLiteral + | BinaryIntegerLiteral + ; + +fragment +DecimalIntegerLiteral + : DecimalNumeral IntegerTypeSuffix? + ; + +fragment +HexIntegerLiteral + : HexNumeral IntegerTypeSuffix? + ; + +fragment +OctalIntegerLiteral + : OctalNumeral IntegerTypeSuffix? + ; + +fragment +BinaryIntegerLiteral + : BinaryNumeral IntegerTypeSuffix? + ; + +fragment +IntegerTypeSuffix + : [lL] + ; + +fragment +DecimalNumeral + : '0' + | NonZeroDigit (Digits? | Underscores Digits) + ; + +fragment +Digits + : Digit (DigitsAndUnderscores? Digit)? + ; + +fragment +Digit + : '0' + | NonZeroDigit + ; + +fragment +NonZeroDigit + : [1-9] + ; + +fragment +DigitsAndUnderscores + : DigitOrUnderscore+ + ; + +fragment +DigitOrUnderscore + : Digit + | '_' + ; + +fragment +Underscores + : '_'+ + ; + +fragment +HexNumeral + : '0' [xX] HexDigits + ; + +fragment +HexDigits + : HexDigit (HexDigitsAndUnderscores? HexDigit)? + ; + +fragment +HexDigit + : [0-9a-fA-F] + ; + +fragment +HexDigitsAndUnderscores + : HexDigitOrUnderscore+ + ; + +fragment +HexDigitOrUnderscore + : HexDigit + | '_' + ; + +fragment +OctalNumeral + : '0' Underscores? OctalDigits + ; + +fragment +OctalDigits + : OctalDigit (OctalDigitsAndUnderscores? OctalDigit)? + ; + +fragment +OctalDigit + : [0-7] + ; + +fragment +OctalDigitsAndUnderscores + : OctalDigitOrUnderscore+ + ; + +fragment +OctalDigitOrUnderscore + : OctalDigit + | '_' + ; + +fragment +BinaryNumeral + : '0' [bB] BinaryDigits + ; + +fragment +BinaryDigits + : BinaryDigit (BinaryDigitsAndUnderscores? BinaryDigit)? + ; + +fragment +BinaryDigit + : [01] + ; + +fragment +BinaryDigitsAndUnderscores + : BinaryDigitOrUnderscore+ + ; + +fragment +BinaryDigitOrUnderscore + : BinaryDigit + | '_' + ; + +// §3.10.2 Floating-Point Literals + +FloatingPointLiteral + : DecimalFloatingPointLiteral + | HexadecimalFloatingPointLiteral + ; + +fragment +DecimalFloatingPointLiteral + : Digits '.' Digits? ExponentPart? FloatTypeSuffix? + | '.' Digits ExponentPart? FloatTypeSuffix? + | Digits ExponentPart FloatTypeSuffix? + | Digits FloatTypeSuffix + ; + +fragment +ExponentPart + : ExponentIndicator SignedInteger + ; + +fragment +ExponentIndicator + : [eE] + ; + +fragment +SignedInteger + : Sign? Digits + ; + +fragment +Sign + : [+-] + ; + +fragment +FloatTypeSuffix + : [fFdD] + ; + +fragment +HexadecimalFloatingPointLiteral + : HexSignificand BinaryExponent FloatTypeSuffix? + ; + +fragment +HexSignificand + : HexNumeral '.'? + | '0' [xX] HexDigits? '.' HexDigits + ; + +fragment +BinaryExponent + : BinaryExponentIndicator SignedInteger + ; + +fragment +BinaryExponentIndicator + : [pP] + ; + +// §3.10.3 Boolean Literals + +BooleanLiteral + : 'true' + | 'false' + ; + +// §3.10.4 Character Literals + +CharacterLiteral + : '\'' SingleCharacter '\'' + | '\'' EscapeSequence '\'' + ; + +fragment +SingleCharacter + : ~['\\] + ; + +// §3.10.5 String Literals + +StringLiteral + : '"' StringCharacters? '"' + ; + +fragment +StringCharacters + : StringCharacter+ + ; + +fragment +StringCharacter + : ~["\\] + | EscapeSequence + ; + +// §3.10.6 Escape Sequences for Character and String Literals + +fragment +EscapeSequence + : '\\' [btnfr"'\\] + | OctalEscape + ; + +fragment +OctalEscape + : '\\' OctalDigit + | '\\' OctalDigit OctalDigit + | '\\' ZeroToThree OctalDigit OctalDigit + ; + +fragment +ZeroToThree + : [0-3] + ; + +// §3.10.7 The Null Literal + +NullLiteral + : 'null' + ; + +// §3.11 Separators + +LPAREN : '('; +RPAREN : ')'; +LBRACE : '{'; +RBRACE : '}'; +LBRACK : '['; +RBRACK : ']'; +SEMI : ';'; +COMMA : ','; +DOT : '.'; + +// §3.12 Operators + +ASSIGN : '='; +GT : '>'; +LT : '<'; +BANG : '!'; +TILDE : '~'; +QUESTION : '?'; +COLON : ':'; +EQUAL : '=='; +LE : '<='; +GE : '>='; +NOTEQUAL : '!='; +AND : '&&'; +OR : '||'; +INC : '++'; +DEC : '--'; +ADD : '+'; +SUB : '-'; +MUL : '*'; +DIV : '/'; +BITAND : '&'; +BITOR : '|'; +CARET : '^'; +MOD : '%'; + +ADD_ASSIGN : '+='; +SUB_ASSIGN : '-='; +MUL_ASSIGN : '*='; +DIV_ASSIGN : '/='; +AND_ASSIGN : '&='; +OR_ASSIGN : '|='; +XOR_ASSIGN : '^='; +MOD_ASSIGN : '%='; +LSHIFT_ASSIGN : '<<='; +RSHIFT_ASSIGN : '>>='; +URSHIFT_ASSIGN : '>>>='; + +// §3.8 Identifiers (must appear after all keywords in the grammar) + +Identifier + : JavaLetter JavaLetterOrDigit* + ; + +fragment +JavaLetter + : [a-zA-Z$_] // these are the "java letters" below 0xFF + | // covers all characters above 0xFF which are not a surrogate + ~[\u0000-\u00FF\uD800-\uDBFF] + {Character.isJavaIdentifierStart(_input.LA(-1))}? + | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF + [\uD800-\uDBFF] [\uDC00-\uDFFF] + {Character.isJavaIdentifierStart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}? + ; + +fragment +JavaLetterOrDigit + : [a-zA-Z0-9$_] // these are the "java letters or digits" below 0xFF + | // covers all characters above 0xFF which are not a surrogate + ~[\u0000-\u00FF\uD800-\uDBFF] + {Character.isJavaIdentifierPart(_input.LA(-1))}? + | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF + [\uD800-\uDBFF] [\uDC00-\uDFFF] + {Character.isJavaIdentifierPart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}? + ; + +// +// Additional symbols not defined in the lexical specification +// + +AT : '@'; +ELLIPSIS : '...'; + +// +// Whitespace and comments +// + +WS : [ \t\r\n\u000C]+ -> skip + ; + +COMMENT + : '/*' .*? '*/' -> skip + ; + +LINE_COMMENT + : '//' ~[\r\n]* -> skip + ; diff --git a/runtime/Java/src/org/antlr/v4/runtime/IncrementalParser.java b/runtime/Java/src/org/antlr/v4/runtime/IncrementalParser.java new file mode 100644 index 0000000000..a4b09f4730 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/IncrementalParser.java @@ -0,0 +1,175 @@ +/* + * Copyright 2019 The ANTLR Project. All rights reserved. + * Licensed under the BSD-3-Clause license. See LICENSE file in the project root for license information. + */ +package org.antlr.v4.runtime; + +import org.antlr.v4.runtime.misc.Interval; +import org.antlr.v4.runtime.tree.ErrorNode; +import org.antlr.v4.runtime.tree.ParseTreeListener; +import org.antlr.v4.runtime.tree.TerminalNode; + +/** + * Incremental parser implementation + *

+ * There are only two differences between this parser and the underlying regular + * Parser - guard rules and min/max tracking + *

+ * The guard rule API is used in incremental mode to know when a rule context + * can be reused. It looks for token changes in the bounds of the rule. + *

+ * The min/max tracking is used to track how far ahead/behind the parser looked + * to correctly detect whether a token change can affect a parser rule in the future (IE when + * handed to the guard rule of the next parse) + * + * @note See IncrementalParsing.md for more details on the theory behind this. + * In order to make this easier in code generation, we use the parse + * listener interface to do most of our work. + */ +public abstract class IncrementalParser extends Parser implements ParseTreeListener { + // Current parser epoch. Incremented every time a new incremental parser is + // created. + private static int _PARSER_EPOCH = 0; + + private int parserEpoch; + private IncrementalParserData parseData; + + public IncrementalParser(IncrementalTokenStream input) { + this(input, null); + } + + public IncrementalParser(IncrementalTokenStream input, IncrementalParserData parseData) { + super(input); + this.parseData = parseData; + parserEpoch = IncrementalParser.incrementGlobalParserEpoch(); + // Register ourselves as our own parse listener. Life is weird. + addParseListener(this); + } + + protected static int incrementGlobalParserEpoch() { + return ++IncrementalParser._PARSER_EPOCH; + } + + public int getParserEpoch() { + return parserEpoch; + } + + // Push the current token data onto the min max stack for the stream. + private void pushCurrentTokenToMinMax() { + IncrementalTokenStream incStream = (IncrementalTokenStream) getInputStream(); + Token token = this._input.LT(1); + incStream.pushMinMax(token.getTokenIndex(), token.getTokenIndex()); + } + + // Pop the min max stack the stream is using and return the interval. + private Interval popCurrentMinMax(IncrementalParserRuleContext ctx) { + IncrementalTokenStream incStream = (IncrementalTokenStream) getInputStream(); + return incStream.popMinMax(); + } + + /** + * Guard a rule's previous context from being reused. + *

+ * This routine will check whether a given parser rule needs to be rerun, or if + * we already have context that can be reused for this parse. + */ + public IncrementalParserRuleContext guardRule(IncrementalParserRuleContext parentCtx, int state, int ruleIndex) { + // If we have no previous parse data, the rule needs to be run. + if (this.parseData == null) { + return null; + } + // See if we have seen this state before at this starting point. + IncrementalParserRuleContext existingCtx = this.parseData.tryGetContext( + parentCtx != null ? parentCtx.depth() + 1 : 1, getState(), ruleIndex, + this._input.LT(1).getTokenIndex()); + // We haven't see it, so we need to rerun this rule. + if (existingCtx == null) { + return null; + } + // We have seen it, see if it was affected by the parse + if (this.parseData.ruleAffectedByTokenChanges(existingCtx)) { + return null; + } + // Everything checked out, reuse the rule context - we add it to the + // parent context as enterRule would have; + if (this._ctx != null) { + IncrementalParserRuleContext parent = (IncrementalParserRuleContext) this._ctx; + // add current context to parent if we have a parent + if (parent != null) { + parent.addChild(existingCtx); + } + } + return existingCtx; + } + + /** + * Pop the min max stack the stream is using and union the interval into the + * passed in context. Return the interval for the context + * + * @param ctx Context to union interval into. + */ + private Interval popAndHandleMinMax(IncrementalParserRuleContext ctx) { + Interval interval = popCurrentMinMax(ctx); + ctx.setMinMaxTokenIndex(ctx.getMinMaxTokenIndex().union(interval)); + // Returning interval is wrong because there may have been child + // intervals already merged into this ctx. + return ctx.getMinMaxTokenIndex(); + } + /* + * This is part of the regular Parser API. The super method must be called. + */ + + /** + * The new recursion context is an unfortunate edge case for us. It reparents + * the relationship between the contexts, so we need to merge intervals here. + */ + @Override + public void pushNewRecursionContext(ParserRuleContext localctx, int state, int ruleIndex) { + // This context becomes the child + IncrementalParserRuleContext previous = (IncrementalParserRuleContext) this._ctx; + // The incoming context becomes the parent + IncrementalParserRuleContext incLocalCtx = (IncrementalParserRuleContext) localctx; + incLocalCtx.setMinMaxTokenIndex(incLocalCtx.getMinMaxTokenIndex().union(previous.getMinMaxTokenIndex())); + super.pushNewRecursionContext(localctx, state, ruleIndex); + } + + /* + * These two functions are parse of the ParseTreeListener API. We do not need to + * call super methods + */ + + @Override + public void enterEveryRule(ParserRuleContext ctx) { + // During rule entry, we push a new min/max token state. + pushCurrentTokenToMinMax(); + IncrementalParserRuleContext incCtx = (IncrementalParserRuleContext) ctx; + incCtx.epoch = this.getParserEpoch(); + } + + @Override + public void exitEveryRule(ParserRuleContext ctx) { + // On exit, we need to merge the min max into the current context, + // and then merge the current context interval into our parent. + + // First merge with the interval on the top of the stack. + IncrementalParserRuleContext incCtx = (IncrementalParserRuleContext) ctx; + Interval interval = popAndHandleMinMax(incCtx); + + // Now merge with our parent interval. + if (incCtx.parent != null) { + IncrementalParserRuleContext parentIncCtx = (IncrementalParserRuleContext) incCtx.parent; + parentIncCtx.setMinMaxTokenIndex(parentIncCtx.getMinMaxTokenIndex().union(interval)); + } + } + + @Override + public void visitTerminal(TerminalNode node) { + + } + + @Override + public void visitErrorNode(ErrorNode node) { + + } + +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/IncrementalParserData.java b/runtime/Java/src/org/antlr/v4/runtime/IncrementalParserData.java new file mode 100644 index 0000000000..712b025be2 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/IncrementalParserData.java @@ -0,0 +1,393 @@ +/* + * Copyright 2019 The ANTLR Project. All rights reserved. + * Licensed under the BSD-3-Clause license. See LICENSE file in the project root for license information. + */ +package org.antlr.v4.runtime; + +import java.util.*; + +import org.antlr.v4.runtime.misc.Interval; +import org.antlr.v4.runtime.misc.Pair; +import org.antlr.v4.runtime.tree.ErrorNode; +import org.antlr.v4.runtime.tree.ParseTreeListener; +import org.antlr.v4.runtime.tree.ParseTreeWalker; +import org.antlr.v4.runtime.tree.TerminalNode; + +/* + Compare the intervals in a token offset range. This is used as a comparator for a TreeMap. + Note that equality is defined as containment to make searches for individual element ranges find + their containing range. + The ranges must otherwise be non-overlapping.. +*/ +class CompareTokenOffsetRanges implements Comparator { + + @Override + public int compare(Interval o1, Interval o2) { + if (o1.properlyContains(o2) || o2.properlyContains(o1)) { + return 0; + } + if (o1.b < o2.a) { + return -1; + } else if (o1.a > o2.b) { + return 1; + } + // Overlapping + return 0; + } +} + +class CompareTokensByStart implements Comparator { + @Override + public int compare(TokenChange tc1, TokenChange tc2) { + int startDifference = getStartingIndex(tc1) - getStartingIndex(tc2); + if (startDifference != 0) + return startDifference; + if (tc1.changeType == tc2.changeType) + return 0; + if (tc1.changeType == TokenChangeType.REMOVED) + return -1; + if (tc2.changeType == TokenChangeType.REMOVED) + return 1; + return tc1.changeType.compareTo(tc2.changeType); + + } + + private int getStartingIndex(TokenChange tc) { + if (tc.changeType == TokenChangeType.CHANGED) { + return tc.oldToken.getStartIndex(); + } else if (tc.changeType == TokenChangeType.ADDED) { + return tc.newToken.getStartIndex(); + } else { + return tc.oldToken.getStartIndex(); + } + } +} + +/** + * This class computes and stores data needed by the incremental parser. It is + * fairly unoptimized ATM to make things obvious and hopefully less broken. + *

+ * Please note: This class expects to own the parse tree passed in, and will + * modify it. Please clone them if you need them to remain unmodified for some + * reason. + */ +public class IncrementalParserData { + private IncrementalTokenStream tokenStream; + /* + * This mapping goes from a range to a token index offset to be applied for that + * range. It is used to figure out what token in the new stream to look at for a + * given token in the old stream. + * + * @note Equality is deliberately defined to be containment on this treemap in + * order to be able to find intervals in a given range. + */ + private TreeMap tokenOffsets; + + /* + * This is the set of tokens that changed in any way. We use a treeset so that + * we have the ability to get slices >= and <= certain numbers. + * For those runtimes without an equivalent, an array of numbers and a binary search + * that finds numbers within a range works just as well. + */ + private TreeSet changedTokens; + + /* This is the set of token changes that were specified by the user. */ + private List tokenChanges; + + /* + * This maps from depth, rule number, starting token index, to context we've + * seen before. + */ + private HashMap ruleStartMap = new HashMap<>(); + + public IncrementalParserData() { + } + + public IncrementalParserData(IncrementalTokenStream tokenStream, List tokenChanges, + IncrementalParserRuleContext oldTree) { + this.tokenChanges = tokenChanges; + if (tokenChanges != null) { + this.tokenStream = tokenStream; + computeTokenOffsetRanges(oldTree.getMaxTokenIndex()); + indexAndAdjustParseTree(oldTree); + } + } + + /** + * Take the set of token changes the user specified and convert it into two + * things: + * 1. A list of changed tokens + * 2. A set of ranges that say how tokenIndexes that appear in the old stream + * will have changed in the new stream. IE if a token was removed, the tokens + * after would appear at originalIndex - 1 in the new stream. + * + * @param maxOldTokenIndex The maximum token index we may see in the old stream. + * This is used as the upper bound of the last range. + */ + private void computeTokenOffsetRanges(int maxOldTokenIndex) { + if (this.tokenChanges == null || this.tokenChanges.size() == 0) { + return; + } + // Construct ranges for the token change offsets, and changed token intervals. + int indexOffset = 0; + ArrayList> offsetRanges = new ArrayList<>(); + this.changedTokens = new TreeSet<>(); + Collections.sort(this.tokenChanges, new CompareTokensByStart()); + for (TokenChange tokenChange : this.tokenChanges) { + int indexToPush = 0; + if (tokenChange.changeType == TokenChangeType.CHANGED) { + this.changedTokens.add(tokenChange.newToken.getTokenIndex()); + // We only need to add this to changed tokens, it doesn't + // change token indexes. + continue; + } + // If a token changed, adjust the index the tokens after it + else if (tokenChange.changeType == TokenChangeType.REMOVED) { + this.changedTokens.add(tokenChange.oldToken.getTokenIndex() + indexOffset); + + // The indexes move back one to account for the removed token. + indexOffset -= 1; + indexToPush = tokenChange.oldToken.getTokenIndex(); + } else if (tokenChange.changeType == TokenChangeType.ADDED) { + this.changedTokens.add(tokenChange.newToken.getTokenIndex()); + // The indexes move forward one to account for the removed token. + indexOffset += 1; + indexToPush = tokenChange.newToken.getTokenIndex(); + } + // End the previous range at the token index right before us + if (offsetRanges.size() != 0) { + int lastIdx = offsetRanges.size() - 1; + Pair lastItem = offsetRanges.get(lastIdx); + offsetRanges.set(lastIdx, new Pair<>(Interval.of(lastItem.a.a, indexToPush - 1), lastItem.b)); + } + // Push the range this change starts at, and what the effect is on + // the index. + if (indexOffset != 0) { + offsetRanges.add(new Pair<>(Interval.of(indexToPush, indexToPush), indexOffset)); + } + + } + // End the final range at length of the old token stream. That is the + // last possible thing we need to offset. + if (offsetRanges.size() != 0) { + int lastIdx = offsetRanges.size() - 1; + Pair lastItem = offsetRanges.get(lastIdx); + offsetRanges.set(lastIdx, new Pair<>(Interval.of(lastItem.a.a, maxOldTokenIndex), lastItem.b)); + } + + this.tokenOffsets = new TreeMap<>(new CompareTokenOffsetRanges()); + for (Pair tokenRange : offsetRanges) { + this.tokenOffsets.put(tokenRange.a, tokenRange.b); + } + } + + /** + * Determine whether a given parser rule is affected by changes to the token + * stream. + * + * @param ctx Current parser context coming into a rule. + */ + public boolean ruleAffectedByTokenChanges(IncrementalParserRuleContext ctx) { + // If we never got passed data, reparse everything. + if (this.tokenChanges == null) { + return true; + } + // However if there are no changes, the rule is fine + if (this.tokenChanges.size() == 0) { + return false; + } + + // See if any changed token exists in our upper, lower bounds. + int start = ctx.getMinTokenIndex(); + int end = ctx.getMaxTokenIndex(); + // See if the set has anything in the range we are asking about + boolean result = false; + // Get a view of all elements >= start token to start. + NavigableSet tailSet = this.changedTokens.tailSet(start, true); + // If *any* are in range, the rule is modified. + // Since the set is ordered, once we go past the end of the [start, end] range, + // we can stop. + for (Integer elem : tailSet) { + if (elem <= end) { + result = true; + break; + } else if (elem > end) { + break; + } + } + if (result) { + return true; + } + + return false; + } + + /** + * Try to see if we have existing context for this state, rule and token + * position that may be reused. + * + * @param depth Current rule depth + * @param state Parser state number - currently ignored. + * @param ruleIndex Rule number + * @param tokenIndex Token index in the *new* token stream + */ + public IncrementalParserRuleContext tryGetContext(int depth, int state, int ruleIndex, int tokenIndex) { + return this.ruleStartMap.get(getKey(depth, state, ruleIndex, tokenIndex)); + } + + private String getKeyFromContext(IncrementalParserRuleContext ctx) { + return getKey(ctx.depth(), ctx.invokingState, ctx.getRuleIndex(), ctx.start.getTokenIndex()); + } + + private String getKey(int depth, int state, int rule, int tokenIndex) { + return String.format("%d,%d,%d", depth, rule, tokenIndex); + } + + /** + * Index a given parse tree and adjust the min/max ranges + * + * @param tree Parser context to adjust + */ + private void indexAndAdjustParseTree(IncrementalParserRuleContext tree) { + // This is a quick way of indexing the parse tree by start. We actually + // could walk the old parse tree as the parse proceeds. This is left as + // a future optimization. We also could just allow passing in + // constructed maps if this turns out to be slow. + tokenStream.fill(); + ParseTreeListener listener = new ParseTreeProcessor(); + ParseTreeWalker.DEFAULT.walk(listener, tree); + } + + + /** + * This class does two things: 1. Simple indexer to record the rule index and + * token index start of each rule. 2. Adjust the min max token ranges for any + * necessary offsets. + */ + private class ParseTreeProcessor implements ParseTreeListener { + + /** + * Given a token index in the old token stream, and an array of token changes, + * see what the new token index should be. + * + * @param oldStreamTokenIndex Token index in the old stream + * Return -1 if token does not need to change. + */ + + int findAdjustedTokenIndex(int oldStreamTokenIndex) { + Integer result = tokenOffsets.get(Interval.of(oldStreamTokenIndex, oldStreamTokenIndex)); + if (result == null) + return -1; + return oldStreamTokenIndex + result; + } + + /** + * Given a token index the old stream, figure out the token it would be in the + * new stream and return it. If we don't need token adjustment, return nothing. + * + * @param oldTokenIndex Token index in old stream. + */ + private Token getAdjustedToken(int oldTokenIndex) { + int newTokenIndex = findAdjustedTokenIndex(oldTokenIndex); + if (newTokenIndex != -1) { + // We filled the tokenstream before the walk. + return tokenStream.get(newTokenIndex); + } + return null; + } + + /** + * Adjust the minimum/maximum token index that appears in a rule context. Like + * other functions, this simply converts the token indexes from how they appear + * in the old stream to how they would appear in the new stream. + * + * @param ctx Parser context to adjust. + */ + private void adjustMinMax(IncrementalParserRuleContext ctx) { + boolean changed = false; + int newMin = ctx.getMinTokenIndex(); + Token newToken = getAdjustedToken(newMin); + if (newToken != null) { + newMin = newToken.getTokenIndex(); + changed = true; + } + + int newMax = ctx.getMaxTokenIndex(); + newToken = getAdjustedToken(newMax); + + if (newToken != null) { + newMax = newToken.getTokenIndex(); + changed = true; + } + + if (changed) { + ctx.setMinMaxTokenIndex(Interval.of(newMin, newMax)); + } + } + + /** + * Adjust the start/stop token indexes of a rule to take into account position + * changes in the token stream. + * + * @param ctx The rule context to adjust the start/stop tokens of. + */ + private void adjustStartStop(IncrementalParserRuleContext ctx) { + Token newToken = getAdjustedToken(ctx.start.getTokenIndex()); + if (newToken != null) { + ctx.start = newToken; + } + + if (ctx.stop != null) { + newToken = getAdjustedToken(ctx.stop.getTokenIndex()); + if (newToken != null) { + ctx.stop = newToken; + } + } + } + + @Override + public void visitTerminal(TerminalNode node) { + + } + + @Override + public void visitErrorNode(ErrorNode node) { + + } + + /** + * Process each rule context we see in top-down order, adjusting min- + * max and start-stop tokens, as well as adding the context to the + * rule start map. + * + * @param ctx Context to process + */ + @Override + public void enterEveryRule(ParserRuleContext ctx) { + IncrementalParserRuleContext incCtx = (IncrementalParserRuleContext) ctx; + // Don't bother adjusting rule contexts that we can't possibly + // reuse. Also don't touch contexts without an epoch. They must + // represent something the incremental parser never saw, + // since it sets epochs on all contexts it touches. + if (incCtx.epoch == -1) + return; + boolean mayNeedAdjustment = tokenOffsets != null && tokenOffsets.size() != 0; + if (mayNeedAdjustment) { + adjustMinMax(incCtx); + } + if (!ruleAffectedByTokenChanges(incCtx)) { + if (mayNeedAdjustment) { + adjustStartStop(incCtx); + } + String key = getKeyFromContext(incCtx); + ruleStartMap.put(key, incCtx); + } + } + + @Override + public void exitEveryRule(ParserRuleContext ctx) { + + } + } +} + diff --git a/runtime/Java/src/org/antlr/v4/runtime/IncrementalParserRuleContext.java b/runtime/Java/src/org/antlr/v4/runtime/IncrementalParserRuleContext.java new file mode 100644 index 0000000000..a484b35aef --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/IncrementalParserRuleContext.java @@ -0,0 +1,79 @@ +/* + * Copyright 2019 The ANTLR Project. All rights reserved. + * Licensed under the BSD-3-Clause license. See LICENSE file in the project root for license information. + */ +package org.antlr.v4.runtime; + +import org.antlr.v4.runtime.misc.Interval; + +public class IncrementalParserRuleContext extends ParserRuleContext { + /* Avoid having to recompute depth on every single depth call */ + private int cachedDepth; + private RuleContext cachedParent; + + // This is an epoch number that can be used to tell which pieces were + // modified during a given incremental parse. The incremental parser + // adds the current epoch number to all rule contexts it creates. + // The epoch number is incremented every time a new parser instance is created. + public int epoch = -1; + + // The interval that stores the min/max token we touched during + // lookahead/lookbehind + private Interval _minMaxTokenIndex = Interval.of(Integer.MAX_VALUE, Integer.MIN_VALUE + + ); + + /** + * Get the minimum token index this rule touched. + */ + public int getMinTokenIndex() { + return _minMaxTokenIndex.a; + } + + /** + * Get the maximum token index this rule touched. + */ + public int getMaxTokenIndex() { + return _minMaxTokenIndex.b; + } + + /** + * Get the interval this rule touched. + */ + public Interval getMinMaxTokenIndex() { + return _minMaxTokenIndex; + } + + public void setMinMaxTokenIndex(Interval index) { + _minMaxTokenIndex = index; + } + + /** + * Compute the depth of this context in the parse tree. + * + * @note The incremental parser uses a caching implemntation. + * + */ + @Override + public int depth() { + if (cachedParent != null && cachedParent == this.parent) { + return cachedDepth; + } + int n = 1; + if (this.parent != null) { + int parentDepth = this.parent.depth(); + this.cachedParent = this.parent; + this.cachedDepth = n = parentDepth + 1; + } else { + this.cachedDepth = n = 1; + } + return n; + } + + public IncrementalParserRuleContext() { + } + + public IncrementalParserRuleContext(IncrementalParserRuleContext parent, int invokingStateNumber) { + super(parent, invokingStateNumber); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/IncrementalTokenStream.java b/runtime/Java/src/org/antlr/v4/runtime/IncrementalTokenStream.java new file mode 100644 index 0000000000..4cd1500a45 --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/IncrementalTokenStream.java @@ -0,0 +1,92 @@ +/* + * Copyright 2019 The ANTLR Project. All rights reserved. + * Licensed under the BSD-3-Clause license. See LICENSE file in the project root for license information. + */ +package org.antlr.v4.runtime; + +import java.util.Stack; + +import org.antlr.v4.runtime.misc.Interval; + +public class IncrementalTokenStream extends CommonTokenStream { + /** + * ANTLR looks at the same tokens alot, and this avoids recalculating the + * interval when the position and lookahead number doesn't move. + */ + private int lastP = -1; + private int lastK = -1; + + /** + * This tracks the min/max token index looked at since the value was reset. This + * is used to track how far ahead the grammar looked, since it may be outside + * the rule context's start/stop tokens. We need to maintain a stack of such + * indices. + */ + + private Stack minMaxStack = new Stack(); + + /** + * Constructs a new {@link IncrementalTokenStream} using the specified token + * source and the default token channel ({@link Token#DEFAULT_CHANNEL}). + * + * @param tokenSource The token source. + */ + public IncrementalTokenStream(TokenSource tokenSource) { + super(tokenSource); + } + + /** + * Constructs a new {@link IncrementalTokenStream} using the specified token + * source and filtering tokens to the specified channel. Only tokens whose + * {@link Token#getChannel} matches {@code channel} or have the + * {@link Token#getType} equal to {@link Token#EOF} will be returned by the + * token stream lookahead methods. + * + * @param tokenSource The token source. + * @param channel The channel to use for filtering tokens. + */ + public IncrementalTokenStream(TokenSource tokenSource, int channel) { + this(tokenSource); + this.channel = channel; + } + + /** + * Push a new minimum/maximum token state. + * + * @param min Minimum token index + * @param max Maximum token index + */ + public void pushMinMax(int min, int max) { + minMaxStack.push(Interval.of(min, max)); + } + + /** + * Pop the current minimum/maximum token state and return it. + */ + public Interval popMinMax() { + if (minMaxStack.size() == 0) { + throw new IndexOutOfBoundsException("Can't pop the min max state when there are 0 states"); + } + return minMaxStack.pop(); + } + + /** + * This is an override of the base LT function that tracks the minimum/maximum + * token index looked at. + */ + @Override + public Token LT(int k) { + Token result = super.LT(k); + // Adjust the top of the minimum maximum stack if the position/lookahead amount + // changed. + if (minMaxStack.size() != 0 && (lastP != p || lastK != k)) { + int lastIdx = minMaxStack.size() - 1; + Interval stackItem = minMaxStack.get(lastIdx); + minMaxStack.set(lastIdx, stackItem.union(Interval.of(result.getTokenIndex(), result.getTokenIndex()))); + + lastP = p; + lastK = k; + } + return result; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/TokenChange.java b/runtime/Java/src/org/antlr/v4/runtime/TokenChange.java new file mode 100644 index 0000000000..e03db3fdbf --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/TokenChange.java @@ -0,0 +1,28 @@ +/* + * Copyright 2019 The ANTLR Project. All rights reserved. + * Licensed under the BSD-3-Clause license. See LICENSE file in the project root for license information. + */ +package org.antlr.v4.runtime; + +/** + * Classes that represents a change to a single token + * + * For change type ADDED, newToken is required. + * + * For change type REMOVED, oldToken is required. + * + * For change type CHANGED, oldToken and newToken are required. + * + * Token changes may *not* overlap. You also need to account for hidden tokens + * (but not *skipped* ones). + */ +public class TokenChange { + TokenChangeType changeType; + CommonToken oldToken; + CommonToken newToken; + TokenChange(TokenChangeType changeType,CommonToken oldToken, CommonToken newToken ) { + this.changeType = changeType; + this.oldToken = oldToken; + this.newToken = newToken; + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/TokenChangeBuilder.java b/runtime/Java/src/org/antlr/v4/runtime/TokenChangeBuilder.java new file mode 100644 index 0000000000..192bd16a3f --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/TokenChangeBuilder.java @@ -0,0 +1,33 @@ +/* + * Copyright 2019 The ANTLR Project. All rights reserved. + * Licensed under the BSD-3-Clause license. See LICENSE file in the project root for license information. + */ +package org.antlr.v4.runtime; + +/** + * Simple builder class for TokenChange + */ +public class TokenChangeBuilder { + private TokenChangeType changeType; + private CommonToken oldToken; + private CommonToken newToken; + + public TokenChangeBuilder setChangeType(TokenChangeType changeType) { + this.changeType = changeType; + return this; + } + + public TokenChangeBuilder setOldToken(CommonToken oldToken) { + this.oldToken = oldToken; + return this; + } + + public TokenChangeBuilder setNewToken(CommonToken newToken) { + this.newToken = newToken; + return this; + } + + public TokenChange createTokenChange() { + return new TokenChange(changeType, oldToken, newToken); + } +} diff --git a/runtime/Java/src/org/antlr/v4/runtime/TokenChangeType.java b/runtime/Java/src/org/antlr/v4/runtime/TokenChangeType.java new file mode 100644 index 0000000000..d68005ca1a --- /dev/null +++ b/runtime/Java/src/org/antlr/v4/runtime/TokenChangeType.java @@ -0,0 +1,19 @@ +/* + * Copyright 2019 The ANTLR Project. All rights reserved. + * Licensed under the BSD-3-Clause license. See LICENSE file in the project root for license information. + */ +package org.antlr.v4.runtime; + +/** + * Definition of a token change: + * + * ADDED = A new token that did not exist before + * + * CHANGED = A token that was in the stream before but changed in some way. + * + * REMOVED = A token that no longer exists in the stream. + * + */ +public enum TokenChangeType { + ADDED, CHANGED, REMOVED, +}; \ No newline at end of file diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg index 1683a18f8b..07e8559731 100644 --- a/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Java/Java.stg @@ -223,8 +223,13 @@ Parser(parser, funcs, atn, sempredFuncs, superClass) ::= << >> Parser_(parser, funcs, atn, sempredFuncs, ctor, superClass) ::= << + @SuppressWarnings({"all", "warnings", "unchecked", "unused", "cast"}) + +public class extends { + public class extends { + static { RuntimeMetaData.checkVersion("", RuntimeMetaData.VERSION); } protected static final DFA[] _decisionToDFA; @@ -352,10 +357,21 @@ case : >> parser_ctor(p) ::= << + +public (IncrementalTokenStream input) { + this(input, null); +} +public (IncrementalTokenStream input, IncrementalParserData data) +{ + super(input, data); + _interp = new ParserATNSimulator(this,_ATN,_decisionToDFA,_sharedContextCache); +} + public (TokenStream input) { super(input); _interp = new ParserATNSimulator(this,_ATN,_decisionToDFA,_sharedContextCache); } + >> /* This generates a private method since the actionIndex is generated, making an @@ -392,6 +408,15 @@ RuleFunction(currentRule,args,code,locals,ruleCtx,altLabelCtxs,namedActions,fina }; separator="\n"> }>public final () throws RecognitionException { + + // Check whether we need to execute this rule. + guardResult = ()guardRule((IncrementalParserRuleContext)_ctx, getState(), RULE_); + // If we found an existing context that is valid, return it. + if (guardResult != null) { + this._input.seek(guardResult.stop.getTokenIndex() + 1); + return guardResult; + } + _localctx = new (_ctx, getState()}>); enterRule(_localctx, , RULE_); @@ -435,6 +460,15 @@ LeftRecursiveRuleFunction(currentRule,args,code,locals,ruleCtx,altLabelCtxs, private (int _p}>) throws RecognitionException { ParserRuleContext _parentctx = _ctx; int _parentState = getState(); + + // Check whether we need to execute this rule. + guardResult = ()guardRule((IncrementalParserRuleContext)_ctx, getState(), RULE_); + // If we found an existing context that is valid, return it. + if (guardResult != null) { + this._input.seek(guardResult.stop.getTokenIndex() + 1); + return guardResult; + } + _localctx = new (_ctx, _parentState}>); _prevctx = _localctx; int _startState = ; @@ -782,14 +816,31 @@ CaptureNextTokenType(d) ::= " = _input.LA(1);" StructDecl(struct,ctorAttrs,attrs,getters,dispatchMethods,interfaces,extensionMembers) ::= << + +public static class extends IncrementalParserRuleContext implements { + public static class extends ParserRuleContext implements { + ;}; separator="\n"> }; separator="\n"> - public (ParserRuleContext parent, int invokingState) { super(parent, invokingState); } + + + public (ParserRuleContext parent, int invokingState) { super((IncrementalParserRuleContext) parent, invokingState); } + + public (ParserRuleContext parent, int invokingState) { super(parent, invokingState); } + + + public (ParserRuleContext parent, int invokingState}>) { - super(parent, invokingState); + super((IncrementalParserRuleContext)parent, invokingState); = ;}; separator="\n"> } + + public (ParserRuleContext parent, int invokingState}>) { + super(parent, invokingState); + = ;}; separator="\n"> + } + @Override public int getRuleIndex() { return RULE_; } public () { } diff --git a/tool/src/org/antlr/v4/codegen/model/ParserFile.java b/tool/src/org/antlr/v4/codegen/model/ParserFile.java index 4cd6b260c4..23411a747b 100644 --- a/tool/src/org/antlr/v4/codegen/model/ParserFile.java +++ b/tool/src/org/antlr/v4/codegen/model/ParserFile.java @@ -22,6 +22,7 @@ public class ParserFile extends OutputFile { @ModelElement public Parser parser; @ModelElement public Map namedActions; @ModelElement public ActionChunk contextSuperClass; + public boolean incremental; public String grammarName; public ParserFile(OutputModelFactory factory, String fileName) { @@ -34,6 +35,7 @@ public ParserFile(OutputModelFactory factory, String fileName) { genListener = g.tool.gen_listener; genVisitor = g.tool.gen_visitor; grammarName = g.name; + incremental = Boolean.parseBoolean(g.getOptionString("incremental")); if (g.getOptionString("contextSuperClass") != null) { contextSuperClass = new ActionText(null, g.getOptionString("contextSuperClass")); diff --git a/tool/src/org/antlr/v4/gui/TestRig.java b/tool/src/org/antlr/v4/gui/TestRig.java index a466363168..0cc02bee5e 100644 --- a/tool/src/org/antlr/v4/gui/TestRig.java +++ b/tool/src/org/antlr/v4/gui/TestRig.java @@ -11,6 +11,7 @@ import org.antlr.v4.runtime.CommonToken; import org.antlr.v4.runtime.CommonTokenStream; import org.antlr.v4.runtime.DiagnosticErrorListener; +import org.antlr.v4.runtime.IncrementalTokenStream; import org.antlr.v4.runtime.Lexer; import org.antlr.v4.runtime.Parser; import org.antlr.v4.runtime.ParserRuleContext; @@ -148,8 +149,8 @@ public void process() throws Exception { if ( !startRuleName.equals(LEXER_START_RULE_NAME) ) { String parserName = grammarName+"Parser"; parserClass = cl.loadClass(parserName).asSubclass(Parser.class); - Constructor parserCtor = parserClass.getConstructor(TokenStream.class); - parser = parserCtor.newInstance((TokenStream)null); + Constructor parserCtor = parserClass.getConstructor(IncrementalTokenStream.class); + parser = parserCtor.newInstance((IncrementalTokenStream)null); } Charset charset = ( encoding == null ? Charset.defaultCharset () : Charset.forName(encoding) ); @@ -169,7 +170,7 @@ public void process() throws Exception { protected void process(Lexer lexer, Class parserClass, Parser parser, CharStream input) throws IOException, IllegalAccessException, InvocationTargetException, PrintException { lexer.setInputStream(input); - CommonTokenStream tokens = new CommonTokenStream(lexer); + IncrementalTokenStream tokens = new IncrementalTokenStream(lexer); tokens.fill(); diff --git a/tool/src/org/antlr/v4/tool/Grammar.java b/tool/src/org/antlr/v4/tool/Grammar.java index fc98fcf4a7..e8abca130e 100644 --- a/tool/src/org/antlr/v4/tool/Grammar.java +++ b/tool/src/org/antlr/v4/tool/Grammar.java @@ -83,6 +83,7 @@ public class Grammar implements AttributeResolver { parserOptions.add("language"); parserOptions.add("accessLevel"); parserOptions.add("exportMacro"); + parserOptions.add("incremental"); } public static final Set lexerOptions = parserOptions; @@ -120,6 +121,7 @@ public class Grammar implements AttributeResolver { doNotCopyOptionsToLexer.add("superClass"); doNotCopyOptionsToLexer.add("TokenLabelType"); doNotCopyOptionsToLexer.add("tokenVocab"); + doNotCopyOptionsToLexer.add("incremental"); } public static final Map grammarAndLabelRefTypeToScope =