-
Notifications
You must be signed in to change notification settings - Fork 3.3k
/
Copy pathParserATNSimulator.cs
2327 lines (2119 loc) · 82.3 KB
/
ParserATNSimulator.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
using System;
using System.Collections.Generic;
using Antlr4.Runtime.Dfa;
using Antlr4.Runtime.Misc;
using Antlr4.Runtime.Sharpen;
namespace Antlr4.Runtime.Atn
{
/**
* The embodiment of the adaptive LL(*), ALL(*), parsing strategy.
*
* <p>
* The basic complexity of the adaptive strategy makes it harder to understand.
* We begin with ATN simulation to build paths in a DFA. Subsequent prediction
* requests go through the DFA first. If they reach a state without an edge for
* the current symbol, the algorithm fails over to the ATN simulation to
* complete the DFA path for the current input (until it finds a conflict state
* or uniquely predicting state).</p>
*
* <p>
* All of that is done without using the outer context because we want to create
* a DFA that is not dependent upon the rule invocation stack when we do a
* prediction. One DFA works in all contexts. We avoid using context not
* necessarily because it's slower, although it can be, but because of the DFA
* caching problem. The closure routine only considers the rule invocation stack
* created during prediction beginning in the decision rule. For example, if
* prediction occurs without invoking another rule's ATN, there are no context
* stacks in the configurations. When lack of context leads to a conflict, we
* don't know if it's an ambiguity or a weakness in the strong LL(*) parsing
* strategy (versus full LL(*)).</p>
*
* <p>
* When SLL yields a configuration set with conflict, we rewind the input and
* retry the ATN simulation, this time using full outer context without adding
* to the DFA. Configuration context stacks will be the full invocation stacks
* from the start rule. If we get a conflict using full context, then we can
* definitively say we have a true ambiguity for that input sequence. If we
* don't get a conflict, it implies that the decision is sensitive to the outer
* context. (It is not context-sensitive in the sense of context-sensitive
* grammars.)</p>
*
* <p>
* The next time we reach this DFA state with an SLL conflict, through DFA
* simulation, we will again retry the ATN simulation using full context mode.
* This is slow because we can't save the results and have to "interpret" the
* ATN each time we get that input.</p>
*
* <p>
* <strong>CACHING FULL CONTEXT PREDICTIONS</strong></p>
*
* <p>
* We could cache results from full context to predicted alternative easily and
* that saves a lot of time but doesn't work in presence of predicates. The set
* of visible predicates from the ATN start state changes depending on the
* context, because closure can fall off the end of a rule. I tried to cache
* tuples (stack context, semantic context, predicted alt) but it was slower
* than interpreting and much more complicated. Also required a huge amount of
* memory. The goal is not to create the world's fastest parser anyway. I'd like
* to keep this algorithm simple. By launching multiple threads, we can improve
* the speed of parsing across a large number of files.</p>
*
* <p>
* There is no strict ordering between the amount of input used by SLL vs LL,
* which makes it really hard to build a cache for full context. Let's say that
* we have input A B C that leads to an SLL conflict with full context X. That
* implies that using X we might only use A B but we could also use A B C D to
* resolve conflict. Input A B C D could predict alternative 1 in one position
* in the input and A B C E could predict alternative 2 in another position in
* input. The conflicting SLL configurations could still be non-unique in the
* full context prediction, which would lead us to requiring more input than the
* original A B C. To make a prediction cache work, we have to track the exact
* input used during the previous prediction. That amounts to a cache that maps
* X to a specific DFA for that context.</p>
*
* <p>
* Something should be done for left-recursive expression predictions. They are
* likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry
* with full LL thing Sam does.</p>
*
* <p>
* <strong>AVOIDING FULL CONTEXT PREDICTION</strong></p>
*
* <p>
* We avoid doing full context retry when the outer context is empty, we did not
* dip into the outer context by falling off the end of the decision state rule,
* or when we force SLL mode.</p>
*
* <p>
* As an example of the not dip into outer context case, consider as super
* constructor calls versus function calls. One grammar might look like
* this:</p>
*
* <pre>
* ctorBody
* : '{' superCall? stat* '}'
* ;
* </pre>
*
* <p>
* Or, you might see something like</p>
*
* <pre>
* stat
* : superCall ';'
* | expression ';'
* | ...
* ;
* </pre>
*
* <p>
* In both cases I believe that no closure operations will dip into the outer
* context. In the first case ctorBody in the worst case will stop at the '}'.
* In the 2nd case it should stop at the ';'. Both cases should stay within the
* entry rule and not dip into the outer context.</p>
*
* <p>
* <strong>PREDICATES</strong></p>
*
* <p>
* Predicates are always evaluated if present in either SLL or LL both. SLL and
* LL simulation deals with predicates differently. SLL collects predicates as
* it performs closure operations like ANTLR v3 did. It delays predicate
* evaluation until it reaches and accept state. This allows us to cache the SLL
* ATN simulation whereas, if we had evaluated predicates on-the-fly during
* closure, the DFA state configuration sets would be different and we couldn't
* build up a suitable DFA.</p>
*
* <p>
* When building a DFA accept state during ATN simulation, we evaluate any
* predicates and return the sole semantically valid alternative. If there is
* more than 1 alternative, we report an ambiguity. If there are 0 alternatives,
* we throw an exception. Alternatives without predicates act like they have
* true predicates. The simple way to think about it is to strip away all
* alternatives with false predicates and choose the minimum alternative that
* remains.</p>
*
* <p>
* When we start in the DFA and reach an accept state that's predicated, we test
* those and return the minimum semantically viable alternative. If no
* alternatives are viable, we throw an exception.</p>
*
* <p>
* During full LL ATN simulation, closure always evaluates predicates and
* on-the-fly. This is crucial to reducing the configuration set size during
* closure. It hits a landmine when parsing with the Java grammar, for example,
* without this on-the-fly evaluation.</p>
*
* <p>
* <strong>SHARING DFA</strong></p>
*
* <p>
* All instances of the same parser share the same decision DFAs through a
* static field. Each instance gets its own ATN simulator but they share the
* same {@link #decisionToDFA} field. They also share a
* {@link PredictionContextCache} object that makes sure that all
* {@link PredictionContext} objects are shared among the DFA states. This makes
* a big size difference.</p>
*
* <p>
* <strong>THREAD SAFETY</strong></p>
*
* <p>
* The {@link ParserATNSimulator} locks on the {@link #decisionToDFA} field when
* it adds a new DFA object to that array. {@link #addDFAEdge}
* locks on the DFA for the current decision when setting the
* {@link DFAState#edges} field. {@link #addDFAState} locks on
* the DFA for the current decision when looking up a DFA state to see if it
* already exists. We must make sure that all requests to add DFA states that
* are equivalent result in the same shared DFA object. This is because lots of
* threads will be trying to update the DFA at once. The
* {@link #addDFAState} method also locks inside the DFA lock
* but this time on the shared context cache when it rebuilds the
* configurations' {@link PredictionContext} objects using cached
* subgraphs/nodes. No other locking occurs, even during DFA simulation. This is
* safe as long as we can guarantee that all threads referencing
* {@code s.edge[t]} get the same physical target {@link DFAState}, or
* {@code null}. Once into the DFA, the DFA simulation does not reference the
* {@link DFA#states} map. It follows the {@link DFAState#edges} field to new
* targets. The DFA simulator will either find {@link DFAState#edges} to be
* {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or
* {@code dfa.edges[t]} to be non-null. The
* {@link #addDFAEdge} method could be racing to set the field
* but in either case the DFA simulator works; if {@code null}, and requests ATN
* simulation. It could also race trying to get {@code dfa.edges[t]}, but either
* way it will work because it's not doing a test and set operation.</p>
*
* <p>
* <strong>Starting with SLL then failing to combined SLL/LL (Two-Stage
* Parsing)</strong></p>
*
* <p>
* Sam pointed out that if SLL does not give a syntax error, then there is no
* point in doing full LL, which is slower. We only have to try LL if we get a
* syntax error. For maximum speed, Sam starts the parser set to pure SLL
* mode with the {@link BailErrorStrategy}:</p>
*
* <pre>
* parser.{@link Parser#getInterpreter() getInterpreter()}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )};
* parser.{@link Parser#setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}());
* </pre>
*
* <p>
* If it does not get a syntax error, then we're done. If it does get a syntax
* error, we need to retry with the combined SLL/LL strategy.</p>
*
* <p>
* The reason this works is as follows. If there are no SLL conflicts, then the
* grammar is SLL (at least for that input set). If there is an SLL conflict,
* the full LL analysis must yield a set of viable alternatives which is a
* subset of the alternatives reported by SLL. If the LL set is a singleton,
* then the grammar is LL but not SLL. If the LL set is the same size as the SLL
* set, the decision is SLL. If the LL set has size > 1, then that decision
* is truly ambiguous on the current input. If the LL set is smaller, then the
* SLL conflict resolution might choose an alternative that the full LL would
* rule out as a possibility based upon better context information. If that's
* the case, then the SLL parse will definitely get an error because the full LL
* analysis says it's not viable. If SLL conflict resolution chooses an
* alternative within the LL set, them both SLL and LL would choose the same
* alternative because they both choose the minimum of multiple conflicting
* alternatives.</p>
*
* <p>
* Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and
* a smaller LL set called <em>s</em>. If <em>s</em> is {@code {2, 3}}, then SLL
* parsing will get an error because SLL will pursue alternative 1. If
* <em>s</em> is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will
* choose the same alternative because alternative one is the minimum of either
* set. If <em>s</em> is {@code {2}} or {@code {3}} then SLL will get a syntax
* error. If <em>s</em> is {@code {1}} then SLL will succeed.</p>
*
* <p>
* Of course, if the input is invalid, then we will get an error for sure in
* both SLL and LL parsing. Erroneous input will therefore require 2 passes over
* the input.</p>
*/
public class ParserATNSimulator : ATNSimulator
{
public static readonly bool debug = false;
public static readonly bool debug_list_atn_decisions = false;
public static readonly bool dfa_debug = false;
public static readonly bool retry_debug = false;
protected readonly Parser parser;
public readonly DFA[] decisionToDFA;
/** SLL, LL, or LL + exact ambig detection? */
private PredictionMode mode = PredictionMode.LL;
/** Each prediction operation uses a cache for merge of prediction contexts.
* Don't keep around as it wastes huge amounts of memory. DoubleKeyMap
* isn't synchronized but we're ok since two threads shouldn't reuse same
* parser/atnsim object because it can only handle one input at a time.
* This maps graphs a and b to merged result c. (a,b)→c. We can avoid
* the merge if we ever see a and b again. Note that (b,a)→c should
* also be examined during cache lookup.
*/
protected MergeCache mergeCache;
// LAME globals to avoid parameters!!!!! I need these down deep in predTransition
protected ITokenStream input;
protected int startIndex;
protected ParserRuleContext context;
protected DFA thisDfa;
/** Testing only! */
public ParserATNSimulator(ATN atn, DFA[] decisionToDFA,
PredictionContextCache sharedContextCache)
: this(null, atn, decisionToDFA, sharedContextCache)
{ }
public ParserATNSimulator(Parser parser, ATN atn,
DFA[] decisionToDFA,
PredictionContextCache sharedContextCache)
: base(atn, sharedContextCache)
{
this.parser = parser;
this.decisionToDFA = decisionToDFA;
// DOTGenerator dot = new DOTGenerator(null);
// Console.WriteLine(dot.getDOT(atn.rules.get(0), parser.getRuleNames()));
// Console.WriteLine(dot.getDOT(atn.rules.get(1), parser.getRuleNames()));
}
public override void Reset()
{
}
public override void ClearDFA()
{
for (int d = 0; d < decisionToDFA.Length; d++)
{
decisionToDFA[d] = new DFA(atn.GetDecisionState(d), d);
}
}
public virtual int AdaptivePredict(ITokenStream input, int decision,
ParserRuleContext outerContext)
{
if (debug || debug_list_atn_decisions)
{
Console.WriteLine("adaptivePredict decision " + decision +
" exec LA(1)==" + GetLookaheadName(input) +
" line " + input.LT(1).Line + ":" + input.LT(1).Column);
}
this.input = input;
startIndex = input.Index;
context = outerContext;
DFA dfa = decisionToDFA[decision];
thisDfa = dfa;
int m = input.Mark();
int index = startIndex;
// Now we are certain to have a specific decision's DFA
// But, do we still need an initial state?
try
{
DFAState s0;
if (dfa.IsPrecedenceDfa)
{
// the start state for a precedence DFA depends on the current
// parser precedence, and is provided by a DFA method.
s0 = dfa.GetPrecedenceStartState(parser.Precedence);
}
else {
// the start state for a "regular" DFA is just s0
s0 = dfa.s0;
}
if (s0 == null)
{
if (outerContext == null) outerContext = ParserRuleContext.EmptyContext;
if (debug || debug_list_atn_decisions)
{
Console.WriteLine("predictATN decision " + dfa.decision +
" exec LA(1)==" + GetLookaheadName(input) +
", outerContext=" + outerContext.ToString(parser));
}
bool fullCtx = false;
ATNConfigSet s0_closure =
ComputeStartState(dfa.atnStartState,
ParserRuleContext.EmptyContext,
fullCtx);
if (dfa.IsPrecedenceDfa)
{
/* If this is a precedence DFA, we use applyPrecedenceFilter
* to convert the computed start state to a precedence start
* state. We then use DFA.setPrecedenceStartState to set the
* appropriate start state for the precedence level rather
* than simply setting DFA.s0.
*/
dfa.s0.configSet = s0_closure; // not used for prediction but useful to know start configs anyway
s0_closure = ApplyPrecedenceFilter(s0_closure);
s0 = AddDFAState(dfa, new DFAState(s0_closure));
dfa.SetPrecedenceStartState(parser.Precedence, s0);
}
else {
s0 = AddDFAState(dfa, new DFAState(s0_closure));
dfa.s0 = s0;
}
}
int alt = ExecATN(dfa, s0, input, index, outerContext);
if (debug)
Console.WriteLine("DFA after predictATN: " + dfa.ToString(parser.Vocabulary));
return alt;
}
finally
{
mergeCache = null; // wack cache after each prediction
thisDfa = null;
input.Seek(index);
input.Release(m);
}
}
/** Performs ATN simulation to compute a predicted alternative based
* upon the remaining input, but also updates the DFA cache to avoid
* having to traverse the ATN again for the same input sequence.
There are some key conditions we're looking for after computing a new
set of ATN configs (proposed DFA state):
* if the set is empty, there is no viable alternative for current symbol
* does the state uniquely predict an alternative?
* does the state have a conflict that would prevent us from
putting it on the work list?
We also have some key operations to do:
* add an edge from previous DFA state to potentially new DFA state, D,
upon current symbol but only if adding to work list, which means in all
cases except no viable alternative (and possibly non-greedy decisions?)
* collecting predicates and adding semantic context to DFA accept states
* adding rule context to context-sensitive DFA accept states
* consuming an input symbol
* reporting a conflict
* reporting an ambiguity
* reporting a context sensitivity
* reporting insufficient predicates
cover these cases:
dead end
single alt
single alt + preds
conflict
conflict + preds
*/
protected int ExecATN(DFA dfa, DFAState s0,
ITokenStream input, int startIndex,
ParserRuleContext outerContext)
{
if (debug || debug_list_atn_decisions)
{
Console.WriteLine("execATN decision " + dfa.decision +
" exec LA(1)==" + GetLookaheadName(input) +
" line " + input.LT(1).Line + ":" + input.LT(1).Column);
}
DFAState previousD = s0;
if (debug) Console.WriteLine("s0 = " + s0);
int t = input.LA(1);
while (true)
{ // while more work
DFAState D = GetExistingTargetState(previousD, t);
if (D == null)
{
D = ComputeTargetState(dfa, previousD, t);
}
if (D == ERROR)
{
// if any configs in previous dipped into outer context, that
// means that input up to t actually finished entry rule
// at least for SLL decision. Full LL doesn't dip into outer
// so don't need special case.
// We will get an error no matter what so delay until after
// decision; better error message. Also, no reachable target
// ATN states in SLL implies LL will also get nowhere.
// If conflict in states that dip out, choose min since we
// will get error no matter what.
NoViableAltException e = NoViableAlt(input, outerContext, previousD.configSet, startIndex);
input.Seek(startIndex);
int alt = GetSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previousD.configSet, outerContext);
if (alt != ATN.INVALID_ALT_NUMBER)
{
return alt;
}
throw e;
}
if (D.requiresFullContext && mode != PredictionMode.SLL)
{
// IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error)
BitSet conflictingAlts = D.configSet.conflictingAlts;
if (D.predicates != null)
{
if (debug) Console.WriteLine("DFA state has preds in DFA sim LL failover");
int conflictIndex = input.Index;
if (conflictIndex != startIndex)
{
input.Seek(startIndex);
}
conflictingAlts = EvalSemanticContext(D.predicates, outerContext, true);
if (conflictingAlts.Cardinality() == 1)
{
if (debug) Console.WriteLine("Full LL avoided");
return conflictingAlts.NextSetBit(0);
}
if (conflictIndex != startIndex)
{
// restore the index so reporting the fallback to full
// context occurs with the index at the correct spot
input.Seek(conflictIndex);
}
}
if (dfa_debug) Console.WriteLine("ctx sensitive state " + outerContext + " in " + D);
bool fullCtx = true;
ATNConfigSet s0_closure =
ComputeStartState(dfa.atnStartState, outerContext, fullCtx);
ReportAttemptingFullContext(dfa, conflictingAlts, D.configSet, startIndex, input.Index);
int alt = ExecATNWithFullContext(dfa, D, s0_closure,
input, startIndex,
outerContext);
return alt;
}
if (D.isAcceptState)
{
if (D.predicates == null)
{
return D.prediction;
}
int stopIndex = input.Index;
input.Seek(startIndex);
BitSet alts = EvalSemanticContext(D.predicates, outerContext, true);
switch (alts.Cardinality())
{
case 0:
throw NoViableAlt(input, outerContext, D.configSet, startIndex);
case 1:
return alts.NextSetBit(0);
default:
// report ambiguity after predicate evaluation to make sure the correct
// set of ambig alts is reported.
ReportAmbiguity(dfa, D, startIndex, stopIndex, false, alts, D.configSet);
return alts.NextSetBit(0);
}
}
previousD = D;
if (t != IntStreamConstants.EOF)
{
input.Consume();
t = input.LA(1);
}
}
}
/**
* Get an existing target state for an edge in the DFA. If the target state
* for the edge has not yet been computed or is otherwise not available,
* this method returns {@code null}.
*
* @param previousD The current DFA state
* @param t The next input symbol
* @return The existing target DFA state for the given input symbol
* {@code t}, or {@code null} if the target state for this edge is not
* already cached
*/
protected virtual DFAState GetExistingTargetState(DFAState previousD, int t)
{
DFAState[] edges = previousD.edges;
if (edges == null || t + 1 < 0 || t + 1 >= edges.Length)
{
return null;
}
return edges[t + 1];
}
/**
* Compute a target state for an edge in the DFA, and attempt to add the
* computed state and corresponding edge to the DFA.
*
* @param dfa The DFA
* @param previousD The current DFA state
* @param t The next input symbol
*
* @return The computed target DFA state for the given input symbol
* {@code t}. If {@code t} does not lead to a valid DFA state, this method
* returns {@link #ERROR}.
*/
protected virtual DFAState ComputeTargetState(DFA dfa, DFAState previousD, int t)
{
ATNConfigSet reach = ComputeReachSet(previousD.configSet, t, false);
if (reach == null)
{
AddDFAEdge(dfa, previousD, t, ERROR);
return ERROR;
}
// create new target state; we'll add to DFA after it's complete
DFAState D = new DFAState(reach);
int predictedAlt = GetUniqueAlt(reach);
if (debug)
{
ICollection<BitSet> altSubSets = PredictionMode.GetConflictingAltSubsets(reach.configs);
Console.WriteLine("SLL altSubSets=" + StaticUtils.ToString(altSubSets) +
", configs=" + reach +
", predict=" + predictedAlt + ", allSubsetsConflict=" +
PredictionMode.AllSubsetsConflict(altSubSets) + ", conflictingAlts=" +
GetConflictingAlts(reach));
}
if (predictedAlt != ATN.INVALID_ALT_NUMBER)
{
// NO CONFLICT, UNIQUELY PREDICTED ALT
D.isAcceptState = true;
D.configSet.uniqueAlt = predictedAlt;
D.prediction = predictedAlt;
}
else if (PredictionMode.HasSLLConflictTerminatingPrediction(mode, reach))
{
// MORE THAN ONE VIABLE ALTERNATIVE
D.configSet.conflictingAlts = GetConflictingAlts(reach);
D.requiresFullContext = true;
// in SLL-only mode, we will stop at this state and return the minimum alt
D.isAcceptState = true;
D.prediction = D.configSet.conflictingAlts.NextSetBit(0);
}
if (D.isAcceptState && D.configSet.hasSemanticContext)
{
PredicateDFAState(D, atn.GetDecisionState(dfa.decision));
if (D.predicates != null)
{
D.prediction = ATN.INVALID_ALT_NUMBER;
}
}
// all adds to dfa are done after we've created full D state
D = AddDFAEdge(dfa, previousD, t, D);
return D;
}
protected void PredicateDFAState(DFAState dfaState, DecisionState decisionState)
{
// We need to test all predicates, even in DFA states that
// uniquely predict alternative.
int nalts = decisionState.NumberOfTransitions;
// Update DFA so reach becomes accept state with (predicate,alt)
// pairs if preds found for conflicting alts
BitSet altsToCollectPredsFrom = GetConflictingAltsOrUniqueAlt(dfaState.configSet);
SemanticContext[] altToPred = GetPredsForAmbigAlts(altsToCollectPredsFrom, dfaState.configSet, nalts);
if (altToPred != null)
{
dfaState.predicates = GetPredicatePredictions(altsToCollectPredsFrom, altToPred);
dfaState.prediction = ATN.INVALID_ALT_NUMBER; // make sure we use preds
}
else {
// There are preds in configs but they might go away
// when OR'd together like {p}? || NONE == NONE. If neither
// alt has preds, resolve to min alt
dfaState.prediction = altsToCollectPredsFrom.NextSetBit(0);
}
}
// comes back with reach.UniqueAlt set to a valid alt
protected int ExecATNWithFullContext(DFA dfa,
DFAState D, // how far we got in SLL DFA before failing over
ATNConfigSet s0,
ITokenStream input, int startIndex,
ParserRuleContext outerContext)
{
if (debug || debug_list_atn_decisions)
{
Console.WriteLine("execATNWithFullContext " + s0);
}
bool fullCtx = true;
bool foundExactAmbig = false;
ATNConfigSet reach = null;
ATNConfigSet previous = s0;
input.Seek(startIndex);
int t = input.LA(1);
int predictedAlt;
while (true)
{ // while more work
// Console.WriteLine("LL REACH "+GetLookaheadName(input)+
// " from configs.size="+previous.size()+
// " line "+input.LT(1)Line+":"+input.LT(1).Column);
reach = ComputeReachSet(previous, t, fullCtx);
if (reach == null)
{
// if any configs in previous dipped into outer context, that
// means that input up to t actually finished entry rule
// at least for LL decision. Full LL doesn't dip into outer
// so don't need special case.
// We will get an error no matter what so delay until after
// decision; better error message. Also, no reachable target
// ATN states in SLL implies LL will also get nowhere.
// If conflict in states that dip out, choose min since we
// will get error no matter what.
NoViableAltException e = NoViableAlt(input, outerContext, previous, startIndex);
input.Seek(startIndex);
int alt = GetSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previous, outerContext);
if (alt != ATN.INVALID_ALT_NUMBER)
{
return alt;
}
throw e;
}
ICollection<BitSet> altSubSets = PredictionMode.GetConflictingAltSubsets(reach.configs);
if (debug)
{
Console.WriteLine("LL altSubSets=" + altSubSets +
", predict=" + PredictionMode.GetUniqueAlt(altSubSets) +
", ResolvesToJustOneViableAlt=" +
PredictionMode.ResolvesToJustOneViableAlt(altSubSets));
}
// Console.WriteLine("altSubSets: "+altSubSets);
// System.err.println("reach="+reach+", "+reach.conflictingAlts);
reach.uniqueAlt = GetUniqueAlt(reach);
// unique prediction?
if (reach.uniqueAlt != ATN.INVALID_ALT_NUMBER)
{
predictedAlt = reach.uniqueAlt;
break;
}
if (mode != PredictionMode.LL_EXACT_AMBIG_DETECTION)
{
predictedAlt = PredictionMode.ResolvesToJustOneViableAlt(altSubSets);
if (predictedAlt != ATN.INVALID_ALT_NUMBER)
{
break;
}
}
else {
// In exact ambiguity mode, we never try to terminate early.
// Just keeps scarfing until we know what the conflict is
if (PredictionMode.AllSubsetsConflict(altSubSets) &&
PredictionMode.AllSubsetsEqual(altSubSets))
{
foundExactAmbig = true;
predictedAlt = PredictionMode.GetSingleViableAlt(altSubSets);
break;
}
// else there are multiple non-conflicting subsets or
// we're not sure what the ambiguity is yet.
// So, keep going.
}
previous = reach;
if (t != IntStreamConstants.EOF)
{
input.Consume();
t = input.LA(1);
}
}
// If the configuration set uniquely predicts an alternative,
// without conflict, then we know that it's a full LL decision
// not SLL.
if (reach.uniqueAlt != ATN.INVALID_ALT_NUMBER)
{
ReportContextSensitivity(dfa, predictedAlt, reach, startIndex, input.Index);
return predictedAlt;
}
// We do not check predicates here because we have checked them
// on-the-fly when doing full context prediction.
/*
In non-exact ambiguity detection mode, we might actually be able to
detect an exact ambiguity, but I'm not going to spend the cycles
needed to check. We only emit ambiguity warnings in exact ambiguity
mode.
For example, we might know that we have conflicting configurations.
But, that does not mean that there is no way forward without a
conflict. It's possible to have nonconflicting alt subsets as in:
LL altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}]
from
[(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]),
(13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])]
In this case, (17,1,[5 $]) indicates there is some next sequence that
would resolve this without conflict to alternative 1. Any other viable
next sequence, however, is associated with a conflict. We stop
looking for input because no amount of further lookahead will alter
the fact that we should predict alternative 1. We just can't say for
sure that there is an ambiguity without looking further.
*/
ReportAmbiguity(dfa, D, startIndex, input.Index, foundExactAmbig, reach.GetAlts(), reach);
return predictedAlt;
}
protected virtual ATNConfigSet ComputeReachSet(ATNConfigSet closure, int t, bool fullCtx)
{
if (debug)
Console.WriteLine("in computeReachSet, starting closure: " + closure);
if (mergeCache == null)
{
mergeCache = new MergeCache();
}
ATNConfigSet intermediate = new ATNConfigSet(fullCtx);
/* Configurations already in a rule stop state indicate reaching the end
* of the decision rule (local context) or end of the start rule (full
* context). Once reached, these configurations are never updated by a
* closure operation, so they are handled separately for the performance
* advantage of having a smaller intermediate set when calling closure.
*
* For full-context reach operations, separate handling is required to
* ensure that the alternative matching the longest overall sequence is
* chosen when multiple such configurations can match the input.
*/
List<ATNConfig> skippedStopStates = null;
// First figure out where we can reach on input t
foreach (ATNConfig c in closure.configs)
{
if (debug) Console.WriteLine("testing " + GetTokenName(t) + " at " + c.ToString());
if (c.state is RuleStopState)
{
if (fullCtx || t == IntStreamConstants.EOF)
{
if (skippedStopStates == null)
{
skippedStopStates = new List<ATNConfig>();
}
skippedStopStates.Add(c);
}
continue;
}
int n = c.state.NumberOfTransitions;
for (int ti = 0; ti < n; ti++)
{ // for each transition
Transition trans = c.state.Transition(ti);
ATNState target = GetReachableTarget(trans, t);
if (target != null)
{
intermediate.Add(new ATNConfig(c, target), mergeCache);
}
}
}
// Now figure out where the reach operation can take us...
ATNConfigSet reach = null;
/* This block optimizes the reach operation for intermediate sets which
* trivially indicate a termination state for the overall
* adaptivePredict operation.
*
* The conditions assume that intermediate
* contains all configurations relevant to the reach set, but this
* condition is not true when one or more configurations have been
* withheld in skippedStopStates, or when the current symbol is EOF.
*/
if (skippedStopStates == null && t != TokenConstants.EOF)
{
if (intermediate.Count == 1)
{
// Don't pursue the closure if there is just one state.
// It can only have one alternative; just add to result
// Also don't pursue the closure if there is unique alternative
// among the configurations.
reach = intermediate;
}
else if (GetUniqueAlt(intermediate) != ATN.INVALID_ALT_NUMBER)
{
// Also don't pursue the closure if there is unique alternative
// among the configurations.
reach = intermediate;
}
}
/* If the reach set could not be trivially determined, perform a closure
* operation on the intermediate set to compute its initial value.
*/
if (reach == null)
{
reach = new ATNConfigSet(fullCtx);
HashSet<ATNConfig> closureBusy = new HashSet<ATNConfig>();
bool treatEofAsEpsilon = t == TokenConstants.EOF;
foreach (ATNConfig c in intermediate.configs)
{
Closure(c, reach, closureBusy, false, fullCtx, treatEofAsEpsilon);
}
}
if (t == IntStreamConstants.EOF)
{
/* After consuming EOF no additional input is possible, so we are
* only interested in configurations which reached the end of the
* decision rule (local context) or end of the start rule (full
* context). Update reach to contain only these configurations. This
* handles both explicit EOF transitions in the grammar and implicit
* EOF transitions following the end of the decision or start rule.
*
* When reach==intermediate, no closure operation was performed. In
* this case, removeAllConfigsNotInRuleStopState needs to check for
* reachable rule stop states as well as configurations already in
* a rule stop state.
*
* This is handled before the configurations in skippedStopStates,
* because any configurations potentially added from that list are
* already guaranteed to meet this condition whether or not it's
* required.
*/
reach = RemoveAllConfigsNotInRuleStopState(reach, reach == intermediate);
}
/* If skippedStopStates is not null, then it contains at least one
* configuration. For full-context reach operations, these
* configurations reached the end of the start rule, in which case we
* only add them back to reach if no configuration during the current
* closure operation reached such a state. This ensures adaptivePredict
* chooses an alternative matching the longest overall sequence when
* multiple alternatives are viable.
*/
if (skippedStopStates != null && (!fullCtx || !PredictionMode.HasConfigInRuleStopState(reach.configs)))
{
foreach (ATNConfig c in skippedStopStates)
{
reach.Add(c, mergeCache);
}
}
if (reach.Empty)
return null;
return reach;
}
/**
* Return a configuration set containing only the configurations from
* {@code configs} which are in a {@link RuleStopState}. If all
* configurations in {@code configs} are already in a rule stop state, this
* method simply returns {@code configs}.
*
* <p>When {@code lookToEndOfRule} is true, this method uses
* {@link ATN#nextTokens} for each configuration in {@code configs} which is
* not already in a rule stop state to see if a rule stop state is reachable
* from the configuration via epsilon-only transitions.</p>
*
* @param configs the configuration set to update
* @param lookToEndOfRule when true, this method checks for rule stop states
* reachable by epsilon-only transitions from each configuration in
* {@code configs}.
*
* @return {@code configs} if all configurations in {@code configs} are in a
* rule stop state, otherwise return a new configuration set containing only
* the configurations from {@code configs} which are in a rule stop state
*/
protected ATNConfigSet RemoveAllConfigsNotInRuleStopState(ATNConfigSet configSet, bool lookToEndOfRule)
{
if (PredictionMode.AllConfigsInRuleStopStates(configSet.configs))
{
return configSet;
}
ATNConfigSet result = new ATNConfigSet(configSet.fullCtx);
foreach (ATNConfig config in configSet.configs)
{
if (config.state is RuleStopState)
{
result.Add(config, mergeCache);
continue;
}
if (lookToEndOfRule && config.state.OnlyHasEpsilonTransitions)
{
IntervalSet nextTokens = atn.NextTokens(config.state);
if (nextTokens.Contains(TokenConstants.EPSILON))
{
ATNState endOfRuleState = atn.ruleToStopState[config.state.ruleIndex];
result.Add(new ATNConfig(config, endOfRuleState), mergeCache);
}
}
}
return result;
}
protected ATNConfigSet ComputeStartState(ATNState p,
RuleContext ctx,
bool fullCtx)
{
// always at least the implicit call to start rule
PredictionContext initialContext = PredictionContext.FromRuleContext(atn, ctx);
ATNConfigSet configs = new ATNConfigSet(fullCtx);
for (int i = 0; i < p.NumberOfTransitions; i++)
{
ATNState target = p.Transition(i).target;
ATNConfig c = new ATNConfig(target, i + 1, initialContext);
HashSet<ATNConfig> closureBusy = new HashSet<ATNConfig>();
Closure(c, configs, closureBusy, true, fullCtx, false);
}
return configs;
}
/* parrt internal source braindump that doesn't mess up
* external API spec.