diff --git a/.gitignore b/.gitignore index 81e058979e..00cfcd826f 100644 --- a/.gitignore +++ b/.gitignore @@ -100,3 +100,4 @@ javac-services.0.log.lck # Don't ignore python tests !runtime/Python3/test/ +Antlr4.sln diff --git a/runtime/CSharp/Atn/LL1Analyzer.cs b/runtime/CSharp/Atn/LL1Analyzer.cs index 06bcebb339..e679334128 100644 --- a/runtime/CSharp/Atn/LL1Analyzer.cs +++ b/runtime/CSharp/Atn/LL1Analyzer.cs @@ -3,7 +3,6 @@ * can be found in the LICENSE.txt file in the project root. */ using System.Collections.Generic; -using Antlr4.Runtime.Atn; using Antlr4.Runtime.Misc; using Antlr4.Runtime.Sharpen; @@ -11,12 +10,9 @@ namespace Antlr4.Runtime.Atn { public class LL1Analyzer { - /// - /// Special value added to the lookahead sets to indicate that we hit - /// a predicate during analysis if - /// seeThruPreds==false - /// . - /// + /** Special value added to the lookahead sets to indicate that we hit + * a predicate during analysis if {@code seeThruPreds==false}. + */ public const int HitPred = TokenConstants.InvalidType; [NotNull] @@ -27,25 +23,16 @@ public LL1Analyzer(ATN atn) this.atn = atn; } - /// - /// Calculates the SLL(1) expected lookahead set for each outgoing transition - /// of an - /// - /// . The returned array has one element for each - /// outgoing transition in - /// - /// . If the closure from transition - /// i leads to a semantic predicate before matching a symbol, the - /// element at index i of the result will be - /// - /// . - /// - /// the ATN state - /// - /// the expected symbols for each outgoing transition of - /// - /// . - /// + /** + * Calculates the SLL(1) expected lookahead set for each outgoing transition + * of an {@link ATNState}. The returned array has one element for each + * outgoing transition in {@code s}. If the closure from transition + * i leads to a semantic predicate before matching a symbol, the + * element at index i of the result will be {@code null}. + * + * @param s the ATN state + * @return the expected symbols for each outgoing transition of {@code s}. + */ [return: Nullable] public virtual IntervalSet[] GetDecisionLookahead(ATNState s) { @@ -61,7 +48,7 @@ public virtual IntervalSet[] GetDecisionLookahead(ATNState s) HashSet lookBusy = new HashSet(); bool seeThruPreds = false; // fail to get lookahead upon pred - Look(s.Transition(alt).target, null, PredictionContext.EMPTY, look[alt], lookBusy, new BitSet(), seeThruPreds, false); + Look_(s.Transition(alt).target, null, PredictionContext.EMPTY, look[alt], lookBusy, new BitSet(), seeThruPreds, false); // Wipe out lookahead for this alternative if we found nothing // or we had a predicate when we !seeThruPreds if (look[alt].Count == 0 || look[alt].Contains(HitPred)) @@ -72,190 +59,88 @@ public virtual IntervalSet[] GetDecisionLookahead(ATNState s) return look; } - /// - /// Compute set of tokens that can follow - /// - /// in the ATN in the - /// specified - /// - /// . - ///

If - /// - /// is - /// - /// and the end of the rule containing - /// - /// is reached, - /// - /// is added to the result set. - /// If - /// - /// is not - /// - /// and the end of the outermost rule is - /// reached, - /// - /// is added to the result set.

- ///
- /// the ATN state - /// - /// the complete parser context, or - /// - /// if the context - /// should be ignored - /// - /// - /// The set of tokens that can follow - /// - /// in the ATN in the - /// specified - /// - /// . - /// + /** + * Compute set of tokens that can follow {@code s} in the ATN in the + * specified {@code ctx}. + * + *

If {@code ctx} is {@code null} and the end of the rule containing + * {@code s} is reached, {@link Token#EPSILON} is added to the result set. + * If {@code ctx} is not {@code null} and the end of the outermost rule is + * reached, {@link Token#EOF} is added to the result set.

+ * + * @param s the ATN state + * @param ctx the complete parser context, or {@code null} if the context + * should be ignored + * + * @return The set of tokens that can follow {@code s} in the ATN in the + * specified {@code ctx}. + */ [return: NotNull] public virtual IntervalSet Look(ATNState s, RuleContext ctx) { return Look(s, null, ctx); } - /// - /// Compute set of tokens that can follow - /// - /// in the ATN in the - /// specified - /// - /// . - ///

If - /// - /// is - /// - /// and the end of the rule containing - /// - /// is reached, - /// - /// is added to the result set. - /// If - /// - /// is not - /// PredictionContext#EMPTY_LOCAL - /// and the end of the outermost rule is - /// reached, - /// - /// is added to the result set.

- ///
- /// the ATN state - /// - /// the ATN state to stop at. This can be a - /// - /// to detect epsilon paths through a closure. - /// - /// - /// the complete parser context, or - /// - /// if the context - /// should be ignored - /// - /// - /// The set of tokens that can follow - /// - /// in the ATN in the - /// specified - /// - /// . - /// + /** + * Compute set of tokens that can follow {@code s} in the ATN in the + * specified {@code ctx}. + * + *

If {@code ctx} is {@code null} and the end of the rule containing + * {@code s} is reached, {@link Token#EPSILON} is added to the result set. + * If {@code ctx} is not {@code null} and the end of the outermost rule is + * reached, {@link Token#EOF} is added to the result set.

+ * + * @param s the ATN state + * @param stopState the ATN state to stop at. This can be a + * {@link BlockEndState} to detect epsilon paths through a closure. + * @param ctx the complete parser context, or {@code null} if the context + * should be ignored + * + * @return The set of tokens that can follow {@code s} in the ATN in the + * specified {@code ctx}. + */ [return: NotNull] public virtual IntervalSet Look(ATNState s, ATNState stopState, RuleContext ctx) { IntervalSet r = new IntervalSet(); bool seeThruPreds = true; - PredictionContext lookContext = ctx != null ? PredictionContext.FromRuleContext(s.atn, ctx) : null; - Look(s, stopState, lookContext, r, new HashSet(), new BitSet(), seeThruPreds, true); + PredictionContext lookContext = ctx != null ? PredictionContext.FromRuleContext(s.atn, ctx) : null; + Look_(s, stopState, lookContext, r, new HashSet(), new BitSet(), seeThruPreds, true); return r; } - /// - /// Compute set of tokens that can follow - /// - /// in the ATN in the - /// specified - /// - /// . - ///

- /// If - /// - /// is - /// - /// and - /// - /// or the end of the rule containing - /// - /// is reached, - /// - /// is added to the result set. If - /// - /// is not - /// - /// and - /// - /// is - /// - /// and - /// - /// or the end of the outermost rule is reached, - /// - /// is added to the result set. - ///

- /// the ATN state. - /// - /// the ATN state to stop at. This can be a - /// - /// to detect epsilon paths through a closure. - /// - /// - /// The outer context, or - /// - /// if - /// the outer context should not be used. - /// - /// The result lookahead set. - /// - /// A set used for preventing epsilon closures in the ATN - /// from causing a stack overflow. Outside code should pass - /// new HashSet<ATNConfig> - /// for this argument. - /// - /// - /// A set used for preventing left recursion in the - /// ATN from causing a stack overflow. Outside code should pass - /// new BitSet() - /// for this argument. - /// - /// - /// - /// - /// to true semantic predicates as - /// implicitly - /// - /// and "see through them", otherwise - /// - /// to treat semantic predicates as opaque and add - /// - /// to the - /// result if one is encountered. - /// - /// - /// Add - /// - /// to the result if the end of the - /// outermost context is reached. This parameter has no effect if - /// - /// is - /// - /// . - /// - protected internal virtual void Look(ATNState s, ATNState stopState, PredictionContext ctx, IntervalSet look, HashSet lookBusy, BitSet calledRuleStack, bool seeThruPreds, bool addEOF) + /** + * Compute set of tokens that can follow {@code s} in the ATN in the + * specified {@code ctx}. + * + *

If {@code ctx} is {@code null} and {@code stopState} or the end of the + * rule containing {@code s} is reached, {@link Token#EPSILON} is added to + * the result set. If {@code ctx} is not {@code null} and {@code addEOF} is + * {@code true} and {@code stopState} or the end of the outermost rule is + * reached, {@link Token#EOF} is added to the result set.

+ * + * @param s the ATN state. + * @param stopState the ATN state to stop at. This can be a + * {@link BlockEndState} to detect epsilon paths through a closure. + * @param ctx The outer context, or {@code null} if the outer context should + * not be used. + * @param look The result lookahead set. + * @param lookBusy A set used for preventing epsilon closures in the ATN + * from causing a stack overflow. Outside code should pass + * {@code new HashSet} for this argument. + * @param calledRuleStack A set used for preventing left recursion in the + * ATN from causing a stack overflow. Outside code should pass + * {@code new BitSet()} for this argument. + * @param seeThruPreds {@code true} to true semantic predicates as + * implicitly {@code true} and "see through them", otherwise {@code false} + * to treat semantic predicates as opaque and add {@link #HIT_PRED} to the + * result if one is encountered. + * @param addEOF Add {@link Token#EOF} to the result if the end of the + * outermost context is reached. This parameter has no effect if {@code ctx} + * is {@code null}. + */ + protected internal virtual void Look_(ATNState s, ATNState stopState, PredictionContext ctx, IntervalSet look, HashSet lookBusy, BitSet calledRuleStack, bool seeThruPreds, bool addEOF) { - // System.out.println("_LOOK("+s.stateNumber+", ctx="+ctx); ATNConfig c = new ATNConfig(s, 0, ctx); if (!lookBusy.Add(c)) { @@ -268,50 +153,51 @@ protected internal virtual void Look(ATNState s, ATNState stopState, PredictionC look.Add(TokenConstants.EPSILON); return; } - else if (ctx.IsEmpty && addEOF) { + else if (ctx.IsEmpty && addEOF) + { look.Add(TokenConstants.EOF); - return; + return; } } if (s is RuleStopState) { - if (ctx == null) - { - look.Add(TokenConstants.EPSILON); - return; - } + if (ctx == null) + { + look.Add(TokenConstants.EPSILON); + return; + } else if (ctx.IsEmpty && addEOF) { look.Add(TokenConstants.EOF); return; } - if (ctx != PredictionContext.EMPTY) - { - for (int i = 0; i < ctx.Size; i++) - { - ATNState returnState = atn.states[ctx.GetReturnState(i)]; - bool removed = calledRuleStack.Get(returnState.ruleIndex); - try - { - calledRuleStack.Clear(returnState.ruleIndex); - Look(returnState, stopState, ctx.GetParent(i), look, lookBusy, calledRuleStack, seeThruPreds, addEOF); - } - finally - { - if (removed) - { - calledRuleStack.Set(returnState.ruleIndex); - } - } - } - return; - } + if (ctx != PredictionContext.EMPTY) + { + bool removed = calledRuleStack.Get(s.ruleIndex); + try + { + calledRuleStack.Clear(s.ruleIndex); + for (int i = 0; i < ctx.Size; i++) + { + ATNState returnState = atn.states[ctx.GetReturnState(i)]; + Look_(returnState, stopState, ctx.GetParent(i), look, lookBusy, calledRuleStack, seeThruPreds, addEOF); + } + } + finally + { + if (removed) + { + calledRuleStack.Set(s.ruleIndex); + } + } + return; + } } int n = s.NumberOfTransitions; for (int i_1 = 0; i_1 < n; i_1++) { Transition t = s.Transition(i_1); - if (t is RuleTransition) + if (t.GetType() == typeof(RuleTransition)) { RuleTransition ruleTransition = (RuleTransition)t; if (calledRuleStack.Get(ruleTransition.ruleIndex)) @@ -322,51 +208,42 @@ protected internal virtual void Look(ATNState s, ATNState stopState, PredictionC try { calledRuleStack.Set(ruleTransition.target.ruleIndex); - Look(t.target, stopState, newContext, look, lookBusy, calledRuleStack, seeThruPreds, addEOF); + Look_(t.target, stopState, newContext, look, lookBusy, calledRuleStack, seeThruPreds, addEOF); } finally { calledRuleStack.Clear(ruleTransition.target.ruleIndex); } } - else + else if (t is AbstractPredicateTransition) { - if (t is AbstractPredicateTransition) + if (seeThruPreds) { - if (seeThruPreds) - { - Look(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF); - } - else - { - look.Add(HitPred); - } + Look_(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF); } else { - if (t.IsEpsilon) - { - Look(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF); - } - else + look.Add(HitPred); + } + } + else if (t.IsEpsilon) + { + Look_(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF); + } + else if (t.GetType() == typeof(WildcardTransition)) + { + look.AddAll(IntervalSet.Of(TokenConstants.MinUserTokenType, atn.maxTokenType)); + } + else + { + IntervalSet set = t.Label; + if (set != null) + { + if (t is NotSetTransition) { - if (t is WildcardTransition) - { - look.AddAll(IntervalSet.Of(TokenConstants.MinUserTokenType, atn.maxTokenType)); - } - else - { - IntervalSet set = t.Label; - if (set != null) - { - if (t is NotSetTransition) - { - set = set.Complement(IntervalSet.Of(TokenConstants.MinUserTokenType, atn.maxTokenType)); - } - look.AddAll(set); - } - } + set = set.Complement(IntervalSet.Of(TokenConstants.MinUserTokenType, atn.maxTokenType)); } + look.AddAll(set); } } } diff --git a/runtime/CSharp/DefaultErrorStrategy.cs b/runtime/CSharp/DefaultErrorStrategy.cs index 5dee39cdde..f208c610ff 100644 --- a/runtime/CSharp/DefaultErrorStrategy.cs +++ b/runtime/CSharp/DefaultErrorStrategy.cs @@ -42,6 +42,21 @@ public class DefaultErrorStrategy : IAntlrErrorStrategy protected internal IntervalSet lastErrorStates; + /** + * This field is used to propagate information about the lookahead following + * the previous match. Since prediction prefers completing the current rule + * to error recovery efforts, error reporting may occur later than the + * original point where it was discoverable. The original context is used to + * compute the true expected sets as though the reporting occurred as early + * as possible. + */ + protected ParserRuleContext nextTokensContext; + + /** + * @see #nextTokensContext + */ + protected int nextTokensState; + /// /// ///

The default implementation simply calls @@ -264,8 +279,22 @@ public virtual void Sync(Parser recognizer) int la = tokens.LA(1); // try cheaper subset first; might get lucky. seems to shave a wee bit off var nextTokens = recognizer.Atn.NextTokens(s); - if (nextTokens.Contains(TokenConstants.EPSILON) || nextTokens.Contains(la)) + if (nextTokens.Contains(la)) + { + nextTokensContext = null; + nextTokensState = ATNState.InvalidStateNumber; + return; + } + + if (nextTokens.Contains(TokenConstants.EPSILON)) { + if (nextTokensContext == null) + { + // It's possible the next token won't match; information tracked + // by sync is restricted for performance. + nextTokensContext = recognizer.Context; + nextTokensState = recognizer.State; + } return; } switch (s.StateType) diff --git a/runtime/Cpp/runtime/src/atn/LL1Analyzer.cpp b/runtime/Cpp/runtime/src/atn/LL1Analyzer.cpp index d7949cd1ed..ddca800889 100755 --- a/runtime/Cpp/runtime/src/atn/LL1Analyzer.cpp +++ b/runtime/Cpp/runtime/src/atn/LL1Analyzer.cpp @@ -100,18 +100,16 @@ void LL1Analyzer::_LOOK(ATNState *s, ATNState *stopState, Ref } if (ctx != PredictionContext::EMPTY) { - // run thru all possible stack tops in ctx + bool removed = calledRuleStack.test(s->ruleIndex); + calledRuleStack[s->ruleIndex] = false; + auto onExit = finally([removed, &calledRuleStack, s] { + if (removed) { + calledRuleStack.set(s->ruleIndex); + } + }); + // run thru all possible stack tops in ctx for (size_t i = 0; i < ctx->size(); i++) { ATNState *returnState = _atn.states[ctx->getReturnState(i)]; - - bool removed = calledRuleStack.test(returnState->ruleIndex); - auto onExit = finally([removed, &calledRuleStack, returnState] { - if (removed) { - calledRuleStack.set(returnState->ruleIndex); - } - }); - - calledRuleStack[returnState->ruleIndex] = false; _LOOK(returnState, stopState, ctx->getParent(i), look, lookBusy, calledRuleStack, seeThruPreds, addEOF); } return; diff --git a/runtime/Go/antlr/ll1_analyzer.go b/runtime/Go/antlr/ll1_analyzer.go index f5afd09b39..3ebc40a76b 100644 --- a/runtime/Go/antlr/ll1_analyzer.go +++ b/runtime/Go/antlr/ll1_analyzer.go @@ -112,16 +112,6 @@ func (la *LL1Analyzer) Look(s, stopState ATNState, ctx RuleContext) *IntervalSet func (la *LL1Analyzer) look2(s, stopState ATNState, ctx PredictionContext, look *IntervalSet, lookBusy *Set, calledRuleStack *BitSet, seeThruPreds, addEOF bool, i int) { returnState := la.atn.states[ctx.getReturnState(i)] - - removed := calledRuleStack.contains(returnState.GetRuleIndex()) - - defer func() { - if removed { - calledRuleStack.add(returnState.GetRuleIndex()) - } - }() - - calledRuleStack.remove(returnState.GetRuleIndex()) la.look1(returnState, stopState, ctx.GetParent(i), look, lookBusy, calledRuleStack, seeThruPreds, addEOF) } @@ -158,6 +148,13 @@ func (la *LL1Analyzer) look1(s, stopState ATNState, ctx PredictionContext, look } if ctx != BasePredictionContextEMPTY { + removed := calledRuleStack.contains(s.GetRuleIndex()) + defer func() { + if removed { + calledRuleStack.add(s.GetRuleIndex()) + } + }() + calledRuleStack.remove(s.GetRuleIndex()) // run thru all possible stack tops in ctx for i := 0; i < ctx.length(); i++ { returnState := la.atn.states[ctx.getReturnState(i)] diff --git a/runtime/JavaScript/src/antlr4/LL1Analyzer.js b/runtime/JavaScript/src/antlr4/LL1Analyzer.js index e76238dd78..18ed878996 100644 --- a/runtime/JavaScript/src/antlr4/LL1Analyzer.js +++ b/runtime/JavaScript/src/antlr4/LL1Analyzer.js @@ -128,17 +128,17 @@ class LL1Analyzer { return; } if (ctx !== PredictionContext.EMPTY) { - // run thru all possible stack tops in ctx - for(let i=0; iThe default implementation simply calls {@link #endErrorCondition} to # ensure that the handler is not in error recovery mode.

@@ -203,7 +205,16 @@ def sync(self, recognizer): la = recognizer.getTokenStream().LA(1) # try cheaper subset first; might get lucky. seems to shave a wee bit off nextTokens = recognizer.atn.nextTokens(s) - if Token.EPSILON in nextTokens or la in nextTokens: + if la in nextTokens: + self.nextTokensContext = None + self.nextTokenState = ATNState.INVALID_STATE_NUMBER + return + elif Token.EPSILON in nextTokens: + if self.nextTokensContext is None: + # It's possible the next token won't match information tracked + # by sync is restricted for performance. + self.nextTokensContext = recognizer._ctx + self.nextTokensState = recognizer._stateNumber return if s.stateType in [ATNState.BLOCK_START, ATNState.STAR_BLOCK_START, diff --git a/runtime/Python3/src/antlr4/LL1Analyzer.py b/runtime/Python3/src/antlr4/LL1Analyzer.py index a200a5d0e7..ac149fa2f1 100644 --- a/runtime/Python3/src/antlr4/LL1Analyzer.py +++ b/runtime/Python3/src/antlr4/LL1Analyzer.py @@ -132,16 +132,16 @@ def _LOOK(self, s:ATNState, stopState:ATNState , ctx:PredictionContext, look:Int return if ctx != PredictionContext.EMPTY: - # run thru all possible stack tops in ctx - for i in range(0, len(ctx)): - returnState = self.atn.states[ctx.getReturnState(i)] - removed = returnState.ruleIndex in calledRuleStack - try: - calledRuleStack.discard(returnState.ruleIndex) + removed = s.ruleIndex in calledRuleStack + try: + calledRuleStack.discard(s.ruleIndex) + # run thru all possible stack tops in ctx + for i in range(0, len(ctx)): + returnState = self.atn.states[ctx.getReturnState(i)] self._LOOK(returnState, stopState, ctx.getParent(i), look, lookBusy, calledRuleStack, seeThruPreds, addEOF) - finally: - if removed: - calledRuleStack.add(returnState.ruleIndex) + finally: + if removed: + calledRuleStack.add(s.ruleIndex) return for t in s.transitions: diff --git a/runtime/Python3/src/antlr4/error/ErrorStrategy.py b/runtime/Python3/src/antlr4/error/ErrorStrategy.py index d7538b6075..0f7caadb24 100644 --- a/runtime/Python3/src/antlr4/error/ErrorStrategy.py +++ b/runtime/Python3/src/antlr4/error/ErrorStrategy.py @@ -58,6 +58,8 @@ def __init__(self): # self.lastErrorIndex = -1 self.lastErrorStates = None + self.nextTokensContext = None + self.nextTokenState = 0 #

The default implementation simply calls {@link #endErrorCondition} to # ensure that the handler is not in error recovery mode.

@@ -208,7 +210,16 @@ def sync(self, recognizer:Parser): la = recognizer.getTokenStream().LA(1) # try cheaper subset first; might get lucky. seems to shave a wee bit off nextTokens = recognizer.atn.nextTokens(s) - if Token.EPSILON in nextTokens or la in nextTokens: + if la in nextTokens: + self.nextTokensContext = None + self.nextTokenState = ATNState.INVALID_STATE_NUMBER + return + elif Token.EPSILON in nextTokens: + if self.nextTokensContext is None: + # It's possible the next token won't match information tracked + # by sync is restricted for performance. + self.nextTokensContext = recognizer._ctx + self.nextTokensState = recognizer._stateNumber return if s.stateType in [ATNState.BLOCK_START, ATNState.STAR_BLOCK_START, diff --git a/runtime/Swift/Sources/Antlr4/atn/LL1Analyzer.swift b/runtime/Swift/Sources/Antlr4/atn/LL1Analyzer.swift index 3f594f2347..bc179a1d0d 100644 --- a/runtime/Swift/Sources/Antlr4/atn/LL1Analyzer.swift +++ b/runtime/Swift/Sources/Antlr4/atn/LL1Analyzer.swift @@ -169,16 +169,18 @@ public class LL1Analyzer { } if ctx != PredictionContext.EMPTY { + let removed = try! calledRuleStack.get(s.ruleIndex!) + try! calledRuleStack.clear(s.ruleIndex!) + defer { + if removed { + try! calledRuleStack.set(s.ruleIndex!) + } + } // run thru all possible stack tops in ctx let length = ctx.size() for i in 0..