From 1814d27114c941dabc049c54d80ecfc45843734f Mon Sep 17 00:00:00 2001 From: "Jim.Idle" Date: Sat, 13 Aug 2022 17:09:09 +0800 Subject: [PATCH] fix: Fixes for https://github.com/antlr/antlr4/issues/3718 o Implement collections with generics to solve hash collisions o Fix type casting in LL start parser simulation optimization o General minor tidy ups Acknowledgements to @kaby76 for help with tracing errors Signed-off-by: Jim.Idle --- runtime/Go/antlr/atn_config.go | 16 +- runtime/Go/antlr/atn_config_set.go | 31 +++- runtime/Go/antlr/dfa.go | 51 ++---- runtime/Go/antlr/dfa_state.go | 66 ++++++- runtime/Go/antlr/go.mod | 2 +- runtime/Go/antlr/jcollect.go | 167 ++++++++++++++++++ runtime/Go/antlr/jcollect_test.go | 15 ++ runtime/Go/antlr/lexer_atn_simulator.go | 15 +- runtime/Go/antlr/parser.go | 12 +- runtime/Go/antlr/parser_atn_simulator.go | 61 +++---- runtime/Go/antlr/prediction_context.go | 27 +-- runtime/Go/antlr/prediction_mode.go | 64 ++++--- runtime/Go/antlr/utils.go | 2 +- .../antlr/v4/runtime/atn/ATNConfigSet.java | 5 +- 14 files changed, 386 insertions(+), 148 deletions(-) create mode 100644 runtime/Go/antlr/jcollect.go create mode 100644 runtime/Go/antlr/jcollect_test.go diff --git a/runtime/Go/antlr/atn_config.go b/runtime/Go/antlr/atn_config.go index 97ba417f74..604c968caa 100644 --- a/runtime/Go/antlr/atn_config.go +++ b/runtime/Go/antlr/atn_config.go @@ -20,6 +20,8 @@ type comparable interface { type ATNConfig interface { comparable + gequals(other Collectable[ATNConfig]) bool + hash() int GetState() ATNState @@ -47,7 +49,7 @@ type BaseATNConfig struct { reachesIntoOuterContext int } -func NewBaseATNConfig7(old *BaseATNConfig) *BaseATNConfig { // TODO: Dup +func NewBaseATNConfig7(old *BaseATNConfig) ATNConfig { // TODO: Dup return &BaseATNConfig{ state: old.state, alt: old.alt, @@ -134,10 +136,13 @@ func (b *BaseATNConfig) GetReachesIntoOuterContext() int { func (b *BaseATNConfig) SetReachesIntoOuterContext(v int) { b.reachesIntoOuterContext = v } +func (b *BaseATNConfig) equals(o interface{}) bool { + return b.gequals(o.(Collectable[ATNConfig])) +} // An ATN configuration is equal to another if both have the same state, they // predict the same alternative, and syntactic/semantic contexts are the same. -func (b *BaseATNConfig) equals(o interface{}) bool { +func (b *BaseATNConfig) gequals(o Collectable[ATNConfig]) bool { if b == o { return true } @@ -153,7 +158,7 @@ func (b *BaseATNConfig) equals(o interface{}) bool { if b.context == nil { equal = other.context == nil } else { - equal = b.context.equals(other.context) + equal = b.context.gequals(other.context) } var ( @@ -262,6 +267,10 @@ func (l *LexerATNConfig) hash() int { } func (l *LexerATNConfig) equals(other interface{}) bool { + return l.gequals(other.(Collectable[ATNConfig])) +} + +func (l *LexerATNConfig) gequals(other Collectable[ATNConfig]) bool { var othert, ok = other.(*LexerATNConfig) if l == other { @@ -287,7 +296,6 @@ func (l *LexerATNConfig) equals(other interface{}) bool { return l.BaseATNConfig.equals(othert.BaseATNConfig) } - func checkNonGreedyDecision(source *LexerATNConfig, target ATNState) bool { var ds, ok = target.(DecisionState) diff --git a/runtime/Go/antlr/atn_config_set.go b/runtime/Go/antlr/atn_config_set.go index 49ad4a7197..f17b5c2b07 100644 --- a/runtime/Go/antlr/atn_config_set.go +++ b/runtime/Go/antlr/atn_config_set.go @@ -214,6 +214,33 @@ func (b *BaseATNConfigSet) AddAll(coll []ATNConfig) bool { return false } +// Compare is a hack function just to verify that adding DFAstares to the known +// set works, so long as comparison of ATNConfigSet s works. For that to work, we +// need to make sure that the set of ATNConfigs in two sets are equivalent. We can't +// know the order, so we do this inefficient hack. If this proves the point, then +// we can change the config set to a better structure. +func (b *BaseATNConfigSet) Compare(bs *BaseATNConfigSet) bool { + if len(b.configs) != len(bs.configs) { + return false + } + + for _, c := range b.configs { + found := false + for _, c2 := range bs.configs { + if c.equals(c2) { + found = true + break + } + } + + if !found { + return false + } + + } + return true +} + func (b *BaseATNConfigSet) Equals(other interface{}) bool { if b == other { return true @@ -224,12 +251,12 @@ func (b *BaseATNConfigSet) Equals(other interface{}) bool { other2 := other.(*BaseATNConfigSet) return b.configs != nil && - // TODO: b.configs.equals(other2.configs) && // TODO: Is b necessary? b.fullCtx == other2.fullCtx && b.uniqueAlt == other2.uniqueAlt && b.conflictingAlts == other2.conflictingAlts && b.hasSemanticContext == other2.hasSemanticContext && - b.dipsIntoOuterContext == other2.dipsIntoOuterContext + b.dipsIntoOuterContext == other2.dipsIntoOuterContext && + b.Compare(other2) } func (b *BaseATNConfigSet) hash() int { diff --git a/runtime/Go/antlr/dfa.go b/runtime/Go/antlr/dfa.go index d55a2a87d5..26b622c200 100644 --- a/runtime/Go/antlr/dfa.go +++ b/runtime/Go/antlr/dfa.go @@ -4,10 +4,6 @@ package antlr -import ( - "sort" -) - type DFA struct { // atnStartState is the ATN state in which this was created atnStartState DecisionState @@ -15,8 +11,14 @@ type DFA struct { decision int // states is all the DFA states. Use Map to get the old state back; Set can only - // indicate whether it is there. - states map[int]*DFAState + // indicate whether it is there. Go maps implement key hash collisions and so on and are very + // good, but the DFAState is an object and can't be used directly as the key as it can in say JAva + // amd C#, whereby if the hashcode is the same for two objects, then equals() is called against them + // to see if they really are the same object. + // + // + states *JStore[*DFAState, *DFAStateComparator[*DFAState]] + numstates int s0 *DFAState @@ -29,7 +31,7 @@ func NewDFA(atnStartState DecisionState, decision int) *DFA { dfa := &DFA{ atnStartState: atnStartState, decision: decision, - states: make(map[int]*DFAState), + states: NewJStore[*DFAState, *DFAStateComparator[*DFAState]](&DFAStateComparator[*DFAState]{}), } if s, ok := atnStartState.(*StarLoopEntryState); ok && s.precedenceRuleDecision { dfa.precedenceDfa = true @@ -92,7 +94,8 @@ func (d *DFA) getPrecedenceDfa() bool { // true or nil otherwise, and d.precedenceDfa is updated. func (d *DFA) setPrecedenceDfa(precedenceDfa bool) { if d.getPrecedenceDfa() != precedenceDfa { - d.setStates(make(map[int]*DFAState)) + d.states = NewJStore[*DFAState, *DFAStateComparator[*DFAState]](&DFAStateComparator[*DFAState]{}) + d.numstates = 0 if precedenceDfa { precedenceState := NewDFAState(-1, NewBaseATNConfigSet(false)) @@ -117,38 +120,12 @@ func (d *DFA) setS0(s *DFAState) { d.s0 = s } -func (d *DFA) getState(hash int) (*DFAState, bool) { - s, ok := d.states[hash] - return s, ok -} - -func (d *DFA) setStates(states map[int]*DFAState) { - d.states = states -} - -func (d *DFA) setState(hash int, state *DFAState) { - d.states[hash] = state -} - -func (d *DFA) numStates() int { - return len(d.states) -} - -type dfaStateList []*DFAState - -func (d dfaStateList) Len() int { return len(d) } -func (d dfaStateList) Less(i, j int) bool { return d[i].stateNumber < d[j].stateNumber } -func (d dfaStateList) Swap(i, j int) { d[i], d[j] = d[j], d[i] } - // sortedStates returns the states in d sorted by their state number. func (d *DFA) sortedStates() []*DFAState { - vs := make([]*DFAState, 0, len(d.states)) - - for _, v := range d.states { - vs = append(vs, v) - } - sort.Sort(dfaStateList(vs)) + vs := d.states.SortedSlice(func(i, j *DFAState) bool { + return i.stateNumber < j.stateNumber + }) return vs } diff --git a/runtime/Go/antlr/dfa_state.go b/runtime/Go/antlr/dfa_state.go index 970ed19865..eba0997380 100644 --- a/runtime/Go/antlr/dfa_state.go +++ b/runtime/Go/antlr/dfa_state.go @@ -81,6 +81,45 @@ type DFAState struct { predicates []*PredPrediction } +type DFAStateComparator[T Collectable[T]] struct { +} + +// NB This is the same as normal DFAState +func (c *DFAStateComparator[T]) equals(o1, o2 T) bool { + return o1.gequals(o2) +} + +// NB This is the same as normal DFAState +func (c *DFAStateComparator[T]) hash(o T) int { + return o.hash() +} + +// equals returns true if this DFA state is equal to another DFA state. +// Two [DFAState] instances are equal if their ATN configuration sets +// are the same. This method is used to see if a state already exists. +// +// Because the number of alternatives and number of ATN configurations are +// finite, there is a finite number of DFA states that can be processed. +// This is necessary to show that the algorithm terminates. +// +// Cannot test the DFA state numbers here because in [ParserATNSimulator.addDFAState] +// we need to know if any other state exists that has this exact set of ATN configurations. The +// [stateNumber] is irrelevant. +func (d *DFAState) gequals(o Collectable[*DFAState]) bool { + if d == o { + return true + } + + return d.configs.Equals(o.(*DFAState).configs) +} +func (d *DFAState) equals(o interface{}) bool { + if d == o { + return true + } + + return d.configs.Equals(o.(*DFAState).configs) +} + func NewDFAState(stateNumber int, configs ATNConfigSet) *DFAState { if configs == nil { configs = NewBaseATNConfigSet(false) @@ -141,15 +180,15 @@ func (d *DFAState) setPrediction(v int) { // Cannot test the DFA state numbers here because in // ParserATNSimulator.addDFAState we need to know if any other state exists that // has d exact set of ATN configurations. The stateNumber is irrelevant. -func (d *DFAState) equals(other interface{}) bool { - if d == other { - return true - } else if _, ok := other.(*DFAState); !ok { - return false - } - - return d.configs.Equals(other.(*DFAState).configs) -} +// func (d *DFAState) equals(other interface{}) bool { +// if d == other { +// return true +// } else if _, ok := other.(*DFAState); !ok { +// return false +// } +// +// return d.configs.Equals(other.(*DFAState).configs) +//} func (d *DFAState) String() string { var s string @@ -169,3 +208,12 @@ func (d *DFAState) hash() int { h = murmurUpdate(h, d.configs.hash()) return murmurFinish(h, 1) } + +func (d *DFAState) Equals(o *DFAState) bool { + + if d == o { + return true + } + + return d.configs.Equals(o.configs) +} diff --git a/runtime/Go/antlr/go.mod b/runtime/Go/antlr/go.mod index 004cf34655..6281e331eb 100644 --- a/runtime/Go/antlr/go.mod +++ b/runtime/Go/antlr/go.mod @@ -1,3 +1,3 @@ module github.com/antlr/antlr4/runtime/Go/antlr -go 1.16 +go 1.18 diff --git a/runtime/Go/antlr/jcollect.go b/runtime/Go/antlr/jcollect.go new file mode 100644 index 0000000000..bdbbf8241c --- /dev/null +++ b/runtime/Go/antlr/jcollect.go @@ -0,0 +1,167 @@ +package antlr + +import "sort" + +// Collectable is an interface that a struct should implement if it is to be +// usable as a key in this collection. Cannot use the intuitive equals function +// here because the non-generic runtime has already claimed it. +type Collectable[T any] interface { + hash() int + gequals(other Collectable[T]) bool +} + +type Comparator[T any] interface { + hash(o T) int + equals(T, T) bool +} + +// JStore implements a container that allows the use of a struct to calculate the key +// for a collection of values akin to map. This is not meant to be a full-blown HashMap but just +// serve the needs of the ANTLR Go runtime. +// +// For ease of porting the logic of the runtime from the master target (Java), this collection +// operates in a similar way to Java, in that it can use any struct that supplies a hash() and equals() +// function as the key. The values are stored in a standard go map which internally is a form of hashmap +// itself, the key for the go map is the hash supplied by the key object. The collection is able to deal with +// hash conflicts by using a simple slice of values associated with the hash code indexed bucket. That isn't +// particularly efficient, but it is simple, and it works. As this is specifically for the ANTLR runtime, and +// we understand the requirements, then this is fine - this is not a general purpose collection. +type JStore[T any, C Comparator[T]] struct { + store map[int][]T + len int + comparator Comparator[T] +} + +func NewJStore[T any, C Comparator[T]](comparator Comparator[T]) *JStore[T, C] { + + if comparator == nil { + panic("comparator cannot be nil") + } + + s := &JStore[T, C]{ + store: make(map[int][]T), + comparator: comparator, + } + return s +} + +// Put will store given value in the collection. Note that the key for storage is generated from +// the value itself - this is specifically because that is what ANTLR needs - this would not be useful +// as any kind of general collection. +// +// If the key has a hash conflict, then the value will be added to the slice of values associated with the +// hash, unless the value is already in the slice, in which case the existing value is returned. Value equivalence is +// tested by calling the equals() method on the key. +// +// # If the given value is already present in the store, then the existing value is returned as v and exists is set to true +// +// If the given value is not present in the store, then the value is added to the store and returned as v and exists is set to false. +func (s *JStore[T, C]) Put(value T) (v T, exists bool) { //nolint:ireturn + + kh := s.comparator.hash(value) + + for _, v := range s.store[kh] { + if s.comparator.equals(value, v) { + return v, true + } + } + s.store[kh] = append(s.store[kh], value) + s.len++ + return value, false +} + +// Get will return the value associated with the key - the type of the key is the same type as the value +// which would not generally be useful, but this is a specific thing for ANTLR where the key is +// generated using the object we are going to store. +func (s *JStore[T, C]) Get(key T) (T, bool) { //nolint:ireturn + + kh := s.comparator.hash(key) + + for _, v := range s.store[kh] { + if s.comparator.equals(key, v) { + return v, true + } + } + return key, false +} + +func (s *JStore[T, C]) SortedSlice(less func(i, j T) bool) []T { + vs := make([]T, 0, len(s.store)) + for _, v := range s.store { + vs = append(vs, v...) + } + sort.Slice(vs, func(i, j int) bool { + return less(vs[i], vs[j]) + }) + + return vs +} + +func (s *JStore[T, C]) Len() int { + return s.len +} + +type entry[K, V any] struct { + key K + val V +} + +type JMap[K, V any, C Comparator[K]] struct { + store map[int][]*entry[K, V] + len int + comparator Comparator[K] +} + +func NewJMap[K, V any, C Comparator[K]](comparator Comparator[K]) *JMap[K, V, C] { + return &JMap[K, V, C]{ + store: make(map[int][]*entry[K, V]), + comparator: comparator, + } +} + +func (m *JMap[K, V, C]) Put(key K, val V) { + kh := m.comparator.hash(key) + m.store[kh] = append(m.store[kh], &entry[K, V]{key, val}) + m.len++ +} + +func (m *JMap[K, V, C]) Values() []V { + vs := make([]V, 0, len(m.store)) + for _, e := range m.store { + for _, v := range e { + vs = append(vs, v.val) + } + } + return vs +} + +func (m *JMap[K, V, C]) Get(key K) (V, bool) { + + var none V + kh := m.comparator.hash(key) + for _, e := range m.store[kh] { + if m.comparator.equals(e.key, key) { + return e.val, true + } + } + return none, false +} + +func (m *JMap[K, V, C]) Len() int { + return len(m.store) +} + +func (m *JMap[K, V, C]) Delete(key K) { + kh := m.comparator.hash(key) + for i, e := range m.store[kh] { + if m.comparator.equals(e.key, key) { + m.store[kh] = append(m.store[kh][:i], m.store[kh][i+1:]...) + m.len-- + return + } + } +} + +func (m *JMap[K, V, C]) Clear() { + m.store = make(map[int][]*entry[K, V]) +} diff --git a/runtime/Go/antlr/jcollect_test.go b/runtime/Go/antlr/jcollect_test.go new file mode 100644 index 0000000000..816307a02c --- /dev/null +++ b/runtime/Go/antlr/jcollect_test.go @@ -0,0 +1,15 @@ +package antlr + +import "testing" + +func Test_try(t *testing.T) { + tests := []struct { + name string + }{ + {"Test_try"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + }) + } +} diff --git a/runtime/Go/antlr/lexer_atn_simulator.go b/runtime/Go/antlr/lexer_atn_simulator.go index dc05153ea4..a356893796 100644 --- a/runtime/Go/antlr/lexer_atn_simulator.go +++ b/runtime/Go/antlr/lexer_atn_simulator.go @@ -591,19 +591,24 @@ func (l *LexerATNSimulator) addDFAState(configs ATNConfigSet, suppressEdge bool) proposed.lexerActionExecutor = firstConfigWithRuleStopState.(*LexerATNConfig).lexerActionExecutor proposed.setPrediction(l.atn.ruleToTokenType[firstConfigWithRuleStopState.GetState().GetRuleIndex()]) } - hash := proposed.hash() dfa := l.decisionToDFA[l.mode] l.atn.stateMu.Lock() defer l.atn.stateMu.Unlock() - existing, ok := dfa.getState(hash) - if ok { + existing, present := dfa.states.Put(proposed) + if present { + + // This state was already present, so just return it. + // proposed = existing } else { - proposed.stateNumber = dfa.numStates() + + // The proposed state has already been added to the DFA. We still have the pointer, so + // we can modify it even though it is stored already. + // + proposed.stateNumber = dfa.states.Len() configs.SetReadOnly(true) proposed.configs = configs - dfa.setState(hash, proposed) } if !suppressEdge { dfa.setS0(proposed) diff --git a/runtime/Go/antlr/parser.go b/runtime/Go/antlr/parser.go index 2ab2f56052..e1d00d2155 100644 --- a/runtime/Go/antlr/parser.go +++ b/runtime/Go/antlr/parser.go @@ -91,7 +91,6 @@ func NewBaseParser(input TokenStream) *BaseParser { // bypass alternatives. // // @see ATNDeserializationOptions//isGenerateRuleBypassTransitions() -// var bypassAltsAtnCache = make(map[string]int) // reset the parser's state// @@ -230,7 +229,6 @@ func (p *BaseParser) GetParseListeners() []ParseTreeListener { // @param listener the listener to add // // @panics nilPointerException if {@code} listener is {@code nil} -// func (p *BaseParser) AddParseListener(listener ParseTreeListener) { if listener == nil { panic("listener") @@ -241,13 +239,11 @@ func (p *BaseParser) AddParseListener(listener ParseTreeListener) { p.parseListeners = append(p.parseListeners, listener) } -// // Remove {@code listener} from the list of parse listeners. // //

If {@code listener} is {@code nil} or has not been added as a parse // listener, p.method does nothing.

// @param listener the listener to remove -// func (p *BaseParser) RemoveParseListener(listener ParseTreeListener) { if p.parseListeners != nil { @@ -289,11 +285,9 @@ func (p *BaseParser) TriggerEnterRuleEvent() { } } -// // Notify any parse listeners of an exit rule event. // // @see //addParseListener -// func (p *BaseParser) TriggerExitRuleEvent() { if p.parseListeners != nil { // reverse order walk of listeners @@ -330,7 +324,6 @@ func (p *BaseParser) setTokenFactory(factory TokenFactory) { // // @panics UnsupportedOperationException if the current parser does not // implement the {@link //getSerializedATN()} method. -// func (p *BaseParser) GetATNWithBypassAlts() { // TODO @@ -402,7 +395,6 @@ func (p *BaseParser) SetTokenStream(input TokenStream) { // Match needs to return the current input symbol, which gets put // into the label for the associated token ref e.g., x=ID. -// func (p *BaseParser) GetCurrentToken() Token { return p.input.LT(1) } @@ -624,7 +616,6 @@ func (p *BaseParser) IsExpectedToken(symbol int) bool { // respectively. // // @see ATN//getExpectedTokens(int, RuleContext) -// func (p *BaseParser) GetExpectedTokens() *IntervalSet { return p.Interpreter.atn.getExpectedTokens(p.state, p.ctx) } @@ -686,7 +677,7 @@ func (p *BaseParser) GetDFAStrings() string { func (p *BaseParser) DumpDFA() { seenOne := false for _, dfa := range p.Interpreter.decisionToDFA { - if dfa.numStates() > 0 { + if dfa.states.Len() > 0 { if seenOne { fmt.Println() } @@ -703,7 +694,6 @@ func (p *BaseParser) GetSourceName() string { // During a parse is sometimes useful to listen in on the rule entry and exit // events as well as token Matches. p.is for quick and dirty debugging. -// func (p *BaseParser) SetTrace(trace *TraceListener) { if trace == nil { p.RemoveParseListener(p.tracer) diff --git a/runtime/Go/antlr/parser_atn_simulator.go b/runtime/Go/antlr/parser_atn_simulator.go index 01d50e1416..cd725167a5 100644 --- a/runtime/Go/antlr/parser_atn_simulator.go +++ b/runtime/Go/antlr/parser_atn_simulator.go @@ -174,12 +174,12 @@ func (p *ParserATNSimulator) AdaptivePredict(input TokenStream, decision int, ou // Reporting insufficient predicates // cover these cases: -// dead end -// single alt -// single alt + preds -// conflict -// conflict + preds // +// dead end +// single alt +// single alt + preds +// conflict +// conflict + preds func (p *ParserATNSimulator) execATN(dfa *DFA, s0 *DFAState, input TokenStream, startIndex int, outerContext ParserRuleContext) int { if ParserATNSimulatorDebug || ParserATNSimulatorListATNDecisions { @@ -617,7 +617,6 @@ func (p *ParserATNSimulator) computeReachSet(closure ATNConfigSet, t int, fullCt return reach } -// // Return a configuration set containing only the configurations from // {@code configs} which are in a {@link RuleStopState}. If all // configurations in {@code configs} are already in a rule stop state, p @@ -636,7 +635,6 @@ func (p *ParserATNSimulator) computeReachSet(closure ATNConfigSet, t int, fullCt // @return {@code configs} if all configurations in {@code configs} are in a // rule stop state, otherwise return a Newconfiguration set containing only // the configurations from {@code configs} which are in a rule stop state -// func (p *ParserATNSimulator) removeAllConfigsNotInRuleStopState(configs ATNConfigSet, lookToEndOfRule bool) ATNConfigSet { if PredictionModeallConfigsInRuleStopStates(configs) { return configs @@ -671,7 +669,6 @@ func (p *ParserATNSimulator) computeStartState(a ATNState, ctx RuleContext, full return configs } -// // This method transforms the start state computed by // {@link //computeStartState} to the special start state used by a // precedence DFA for a particular precedence value. The transformation @@ -726,7 +723,6 @@ func (p *ParserATNSimulator) computeStartState(a ATNState, ctx RuleContext, full // @return The transformed configuration set representing the start state // for a precedence DFA at a particular precedence level (determined by // calling {@link Parser//getPrecedence}). -// func (p *ParserATNSimulator) applyPrecedenceFilter(configs ATNConfigSet) ATNConfigSet { statesFromAlt1 := make(map[int]PredictionContext) @@ -760,7 +756,7 @@ func (p *ParserATNSimulator) applyPrecedenceFilter(configs ATNConfigSet) ATNConf // (basically a graph subtraction algorithm). if !config.getPrecedenceFilterSuppressed() { context := statesFromAlt1[config.GetState().GetStateNumber()] - if context != nil && context.equals(config.GetContext()) { + if context != nil && context.gequals(config.GetContext()) { // eliminated continue } @@ -824,7 +820,6 @@ func (p *ParserATNSimulator) getPredicatePredictions(ambigAlts *BitSet, altToPre return pairs } -// // This method is used to improve the localization of error messages by // choosing an alternative rather than panicing a // {@link NoViableAltException} in particular prediction scenarios where the @@ -869,7 +864,6 @@ func (p *ParserATNSimulator) getPredicatePredictions(ambigAlts *BitSet, altToPre // @return The value to return from {@link //AdaptivePredict}, or // {@link ATN//INVALID_ALT_NUMBER} if a suitable alternative was not // identified and {@link //AdaptivePredict} should Report an error instead. -// func (p *ParserATNSimulator) getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(configs ATNConfigSet, outerContext ParserRuleContext) int { cfgs := p.splitAccordingToSemanticValidity(configs, outerContext) semValidConfigs := cfgs[0] @@ -938,11 +932,11 @@ func (p *ParserATNSimulator) splitAccordingToSemanticValidity(configs ATNConfigS } // Look through a list of predicate/alt pairs, returning alts for the -// pairs that win. A {@code NONE} predicate indicates an alt containing an -// unpredicated config which behaves as "always true." If !complete -// then we stop at the first predicate that evaluates to true. This -// includes pairs with nil predicates. // +// pairs that win. A {@code NONE} predicate indicates an alt containing an +// unpredicated config which behaves as "always true." If !complete +// then we stop at the first predicate that evaluates to true. This +// includes pairs with nil predicates. func (p *ParserATNSimulator) evalSemanticContext(predPredictions []*PredPrediction, outerContext ParserRuleContext, complete bool) *BitSet { predictions := NewBitSet() for i := 0; i < len(predPredictions); i++ { @@ -1104,7 +1098,16 @@ func (p *ParserATNSimulator) canDropLoopEntryEdgeInLeftRecursiveRule(config ATNC // left-recursion elimination. For efficiency, also check if // the context has an empty stack case. If so, it would mean // global FOLLOW so we can't perform optimization - if startLoop, ok := _p.(StarLoopEntryState); !ok || !startLoop.precedenceRuleDecision || config.GetContext().isEmpty() || config.GetContext().hasEmptyPath() { + if _p.GetStateType() != ATNStateStarLoopEntry { + return false + } + startLoop, ok := _p.(*StarLoopEntryState) + if !ok { + return false + } + if !startLoop.precedenceRuleDecision || + config.GetContext().isEmpty() || + config.GetContext().hasEmptyPath() { return false } @@ -1117,8 +1120,8 @@ func (p *ParserATNSimulator) canDropLoopEntryEdgeInLeftRecursiveRule(config ATNC return false } } - - decisionStartState := _p.(BlockStartState).GetTransitions()[0].getTarget().(BlockStartState) + x := _p.GetTransitions()[0].getTarget() + decisionStartState := x.(BlockStartState) blockEndStateNum := decisionStartState.getEndState().stateNumber blockEndState := p.atn.states[blockEndStateNum].(*BlockEndState) @@ -1372,9 +1375,9 @@ func (p *ParserATNSimulator) getLookaheadName(input TokenStream) string { } // Used for debugging in AdaptivePredict around execATN but I cut -// it out for clarity now that alg. works well. We can leave p -// "dead" code for a bit. // +// it out for clarity now that alg. works well. We can leave p +// "dead" code for a bit. func (p *ParserATNSimulator) dumpDeadEndConfigs(nvae *NoViableAltException) { panic("Not implemented") @@ -1421,7 +1424,6 @@ func (p *ParserATNSimulator) getUniqueAlt(configs ATNConfigSet) int { return alt } -// // Add an edge to the DFA, if possible. This method calls // {@link //addDFAState} to ensure the {@code to} state is present in the // DFA. If {@code from} is {@code nil}, or if {@code t} is outside the @@ -1440,7 +1442,6 @@ func (p *ParserATNSimulator) getUniqueAlt(configs ATNConfigSet) int { // @return If {@code to} is {@code nil}, p method returns {@code nil} // otherwise p method returns the result of calling {@link //addDFAState} // on {@code to} -// func (p *ParserATNSimulator) addDFAEdge(dfa *DFA, from *DFAState, t int, to *DFAState) *DFAState { if ParserATNSimulatorDebug { fmt.Println("EDGE " + from.String() + " -> " + to.String() + " upon " + p.GetTokenName(t)) @@ -1472,7 +1473,6 @@ func (p *ParserATNSimulator) addDFAEdge(dfa *DFA, from *DFAState, t int, to *DFA return to } -// // Add state {@code D} to the DFA if it is not already present, and return // the actual instance stored in the DFA. If a state equivalent to {@code D} // is already in the DFA, the existing state is returned. Otherwise p @@ -1486,25 +1486,26 @@ func (p *ParserATNSimulator) addDFAEdge(dfa *DFA, from *DFAState, t int, to *DFA // @return The state stored in the DFA. This will be either the existing // state if {@code D} is already in the DFA, or {@code D} itself if the // state was not already present. -// func (p *ParserATNSimulator) addDFAState(dfa *DFA, d *DFAState) *DFAState { if d == ATNSimulatorError { return d } - hash := d.hash() - existing, ok := dfa.getState(hash) - if ok { + existing, present := dfa.states.Put(d) + if present { return existing } - d.stateNumber = dfa.numStates() + + // The state was not present, so update it with configs + // + d.stateNumber = dfa.states.Len() - 1 if !d.configs.ReadOnly() { d.configs.OptimizeConfigs(p.BaseATNSimulator) d.configs.SetReadOnly(true) } - dfa.setState(hash, d) if ParserATNSimulatorDebug { fmt.Println("adding NewDFA state: " + d.String()) } + return d } diff --git a/runtime/Go/antlr/prediction_context.go b/runtime/Go/antlr/prediction_context.go index f85d202a4a..d388d7ca31 100644 --- a/runtime/Go/antlr/prediction_context.go +++ b/runtime/Go/antlr/prediction_context.go @@ -29,7 +29,7 @@ type PredictionContext interface { hash() int GetParent(int) PredictionContext getReturnState(int) int - equals(PredictionContext) bool + gequals(Collectable[PredictionContext]) bool length() int isEmpty() bool hasEmptyPath() bool @@ -86,7 +86,6 @@ func NewPredictionContextCache() *PredictionContextCache { // Add a context to the cache and return it. If the context already exists, // return that one instead and do not add a Newcontext to the cache. // Protect shared cache from unsafe thread access. -// func (p *PredictionContextCache) add(ctx PredictionContext) PredictionContext { if ctx == BasePredictionContextEMPTY { return BasePredictionContextEMPTY @@ -159,8 +158,11 @@ func (b *BaseSingletonPredictionContext) getReturnState(index int) int { func (b *BaseSingletonPredictionContext) hasEmptyPath() bool { return b.returnState == BasePredictionContextEmptyReturnState } +func (b *BaseSingletonPredictionContext) equals(other interface{}) bool { + return b.gequals(other.(Collectable[PredictionContext])) +} -func (b *BaseSingletonPredictionContext) equals(other PredictionContext) bool { +func (b *BaseSingletonPredictionContext) gequals(other Collectable[PredictionContext]) bool { if b == other { return true } else if _, ok := other.(*BaseSingletonPredictionContext); !ok { @@ -171,13 +173,13 @@ func (b *BaseSingletonPredictionContext) equals(other PredictionContext) bool { otherP := other.(*BaseSingletonPredictionContext) - if b.returnState != other.getReturnState(0) { + if b.returnState != otherP.getReturnState(0) { return false } else if b.parentCtx == nil { return otherP.parentCtx == nil } - return b.parentCtx.equals(otherP.parentCtx) + return b.parentCtx.gequals(otherP.parentCtx) } func (b *BaseSingletonPredictionContext) hash() int { @@ -230,8 +232,11 @@ func (e *EmptyPredictionContext) GetParent(index int) PredictionContext { func (e *EmptyPredictionContext) getReturnState(index int) int { return e.returnState } +func (e *EmptyPredictionContext) equals(other Collectable[PredictionContext]) bool { + return e == other +} -func (e *EmptyPredictionContext) equals(other PredictionContext) bool { +func (e *EmptyPredictionContext) gequals(other Collectable[PredictionContext]) bool { return e == other } @@ -298,7 +303,11 @@ func (a *ArrayPredictionContext) getReturnState(index int) int { return a.returnStates[index] } -func (a *ArrayPredictionContext) equals(other PredictionContext) bool { +func (a *ArrayPredictionContext) equals(other interface{}) bool { + return a.gequals(other.(*ArrayPredictionContext)) +} + +func (a *ArrayPredictionContext) gequals(other Collectable[PredictionContext]) bool { if _, ok := other.(*ArrayPredictionContext); !ok { return false } else if a.cachedHash != other.hash() { @@ -390,7 +399,6 @@ func merge(a, b PredictionContext, rootIsWildcard bool, mergeCache *DoubleDict) return mergeArrays(a.(*ArrayPredictionContext), b.(*ArrayPredictionContext), rootIsWildcard, mergeCache) } -// // Merge two {@link SingletonBasePredictionContext} instances. // //

Stack tops equal, parents merge is same return left graph.
@@ -499,7 +507,6 @@ func mergeSingletons(a, b *BaseSingletonPredictionContext, rootIsWildcard bool, return apc } -// // Handle case where at least one of {@code a} or {@code b} is // {@link //EMPTY}. In the following diagrams, the symbol {@code $} is used // to represent {@link //EMPTY}. @@ -561,7 +568,6 @@ func mergeRoot(a, b SingletonPredictionContext, rootIsWildcard bool) PredictionC return nil } -// // Merge two {@link ArrayBasePredictionContext} instances. // //

Different tops, different parents.
@@ -683,7 +689,6 @@ func mergeArrays(a, b *ArrayPredictionContext, rootIsWildcard bool, mergeCache * return M } -// // Make pass over all M {@code parents} merge any {@code equals()} // ones. // / diff --git a/runtime/Go/antlr/prediction_mode.go b/runtime/Go/antlr/prediction_mode.go index 15718f912b..eca8ddc1a1 100644 --- a/runtime/Go/antlr/prediction_mode.go +++ b/runtime/Go/antlr/prediction_mode.go @@ -70,7 +70,6 @@ const ( PredictionModeLLExactAmbigDetection = 2 ) -// // Computes the SLL prediction termination condition. // //

@@ -108,9 +107,9 @@ const ( // The single-alt-state thing lets prediction continue upon rules like // (otherwise, it would admit defeat too soon):

// -//

{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) '' }

+//

{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ” }

// -//

When the ATN simulation reaches the state before {@code ''}, it has a +//

When the ATN simulation reaches the state before {@code ”}, it has a // DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally // {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop // processing this node because alternative to has another way to continue, @@ -152,16 +151,15 @@ const ( // //

Before testing these configurations against others, we have to merge // {@code x} and {@code x'} (without modifying the existing configurations). -// For example, we test {@code (x+x')==x''} when looking for conflicts in +// For example, we test {@code (x+x')==x”} when looking for conflicts in // the following configurations.

// -//

{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}

+//

{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x”, {})}

// //

If the configuration set has predicates (as indicated by // {@link ATNConfigSet//hasSemanticContext}), this algorithm makes a copy of // the configurations to strip out all of the predicates so that a standard // {@link ATNConfigSet} will merge everything ignoring predicates.

-// func PredictionModehasSLLConflictTerminatingPrediction(mode int, configs ATNConfigSet) bool { // Configs in rule stop states indicate reaching the end of the decision // rule (local context) or end of start rule (full context). If all @@ -229,7 +227,6 @@ func PredictionModeallConfigsInRuleStopStates(configs ATNConfigSet) bool { return true } -// // Full LL prediction termination. // //

Can we stop looking ahead during ATN simulation or is there some @@ -334,7 +331,7 @@ func PredictionModeallConfigsInRuleStopStates(configs ATNConfigSet) bool { // // //

  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, -// {@code (s', 2, y)}, {@code (s'', 1, z)} yields non-conflicting set +// {@code (s', 2, y)}, {@code (s”, 1, z)} yields non-conflicting set // {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = // {@code {1}} => stop and predict 1
  • // @@ -369,31 +366,26 @@ func PredictionModeallConfigsInRuleStopStates(configs ATNConfigSet) bool { // two or one and three so we keep going. We can only stop prediction when // we need exact ambiguity detection when the sets look like // {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...

    -// func PredictionModeresolvesToJustOneViableAlt(altsets []*BitSet) int { return PredictionModegetSingleViableAlt(altsets) } -// // Determines if every alternative subset in {@code altsets} contains more // than one alternative. // // @param altsets a collection of alternative subsets // @return {@code true} if every {@link BitSet} in {@code altsets} has // {@link BitSet//cardinality cardinality} > 1, otherwise {@code false} -// func PredictionModeallSubsetsConflict(altsets []*BitSet) bool { return !PredictionModehasNonConflictingAltSet(altsets) } -// // Determines if any single alternative subset in {@code altsets} contains // exactly one alternative. // // @param altsets a collection of alternative subsets // @return {@code true} if {@code altsets} contains a {@link BitSet} with // {@link BitSet//cardinality cardinality} 1, otherwise {@code false} -// func PredictionModehasNonConflictingAltSet(altsets []*BitSet) bool { for i := 0; i < len(altsets); i++ { alts := altsets[i] @@ -404,14 +396,12 @@ func PredictionModehasNonConflictingAltSet(altsets []*BitSet) bool { return false } -// // Determines if any single alternative subset in {@code altsets} contains // more than one alternative. // // @param altsets a collection of alternative subsets // @return {@code true} if {@code altsets} contains a {@link BitSet} with // {@link BitSet//cardinality cardinality} > 1, otherwise {@code false} -// func PredictionModehasConflictingAltSet(altsets []*BitSet) bool { for i := 0; i < len(altsets); i++ { alts := altsets[i] @@ -422,13 +412,11 @@ func PredictionModehasConflictingAltSet(altsets []*BitSet) bool { return false } -// // Determines if every alternative subset in {@code altsets} is equivalent. // // @param altsets a collection of alternative subsets // @return {@code true} if every member of {@code altsets} is equal to the // others, otherwise {@code false} -// func PredictionModeallSubsetsEqual(altsets []*BitSet) bool { var first *BitSet @@ -444,13 +432,11 @@ func PredictionModeallSubsetsEqual(altsets []*BitSet) bool { return true } -// // Returns the unique alternative predicted by all alternative subsets in // {@code altsets}. If no such alternative exists, this method returns // {@link ATN//INVALID_ALT_NUMBER}. // // @param altsets a collection of alternative subsets -// func PredictionModegetUniqueAlt(altsets []*BitSet) int { all := PredictionModeGetAlts(altsets) if all.length() == 1 { @@ -466,7 +452,6 @@ func PredictionModegetUniqueAlt(altsets []*BitSet) int { // // @param altsets a collection of alternative subsets // @return the set of represented alternatives in {@code altsets} -// func PredictionModeGetAlts(altsets []*BitSet) *BitSet { all := NewBitSet() for _, alts := range altsets { @@ -475,7 +460,28 @@ func PredictionModeGetAlts(altsets []*BitSet) *BitSet { return all } -// +type ATNComparator[T Collectable[T]] struct { +} + +// NB This is the same as normal DFAState +func (a *ATNComparator[T]) equals(o1, o2 ATNConfig) bool { + if o1 == o2 { + return true + } + if o1 != nil && o2 != nil { + return o1.GetState().GetStateNumber() == o2.GetState().GetStateNumber() + } + return false +} + +// NB This is the same as normal DFAState +func (a *ATNComparator[T]) hash(o ATNConfig) int { + h := murmurInit(7) + h = murmurUpdate(h, o.GetState().GetStateNumber()) + h = murmurUpdate(h, o.GetContext().hash()) + return murmurFinish(h, 2) +} + // This func gets the conflicting alt subsets from a configuration set. // For each configuration {@code c} in {@code configs}: // @@ -483,36 +489,28 @@ func PredictionModeGetAlts(altsets []*BitSet) *BitSet { // map[c] U= c.{@link ATNConfig//alt alt} // map hash/equals uses s and x, not // alt and not pred // -// func PredictionModegetConflictingAltSubsets(configs ATNConfigSet) []*BitSet { - configToAlts := make(map[int]*BitSet) + configToAlts := NewJMap[ATNConfig, *BitSet, *ATNComparator[ATNConfig]](&ATNComparator[ATNConfig]{}) for _, c := range configs.GetItems() { - key := 31 * c.GetState().GetStateNumber() + c.GetContext().hash() - alts, ok := configToAlts[key] + alts, ok := configToAlts.Get(c) if !ok { alts = NewBitSet() - configToAlts[key] = alts + configToAlts.Put(c, alts) } alts.add(c.GetAlt()) } - values := make([]*BitSet, 0, 10) - for _, v := range configToAlts { - values = append(values, v) - } - return values + return configToAlts.Values() } -// // Get a map from state to alt subset from a configuration set. For each // configuration {@code c} in {@code configs}: // //
     // map[c.{@link ATNConfig//state state}] U= c.{@link ATNConfig//alt alt}
     // 
    -// func PredictionModeGetStateToAltMap(configs ATNConfigSet) *AltDict { m := NewAltDict() diff --git a/runtime/Go/antlr/utils.go b/runtime/Go/antlr/utils.go index ec219df983..8d8a029782 100644 --- a/runtime/Go/antlr/utils.go +++ b/runtime/Go/antlr/utils.go @@ -171,7 +171,7 @@ func (b *BitSet) equals(other interface{}) bool { // We only compare set bits, so we cannot rely on the two slices having the same size. Its // possible for two BitSets to have different slice lengths but the same set bits. So we only - // compare the relavent words and ignore the trailing zeros. + // compare the relevant words and ignore the trailing zeros. bLen := b.minLen() otherLen := otherBitSet.minLen() diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfigSet.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfigSet.java index 5c40e327e9..b2dfa3a9b3 100755 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfigSet.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfigSet.java @@ -74,10 +74,7 @@ public boolean equals(ATNConfig a, ATNConfig b) { * All configs but hashed by (s, i, _, pi) not including context. Wiped out * when we go readonly as this set becomes a DFA state. */ - public AbstractConfigHashSet configLookup; - - /** Track the elements as they are added to the set; supports get(i) */ - public final ArrayList configs = new ArrayList(7); + public AbstractConfigHashSet configLo // TODO: these fields make me pretty uncomfortable but nice to pack up info together, saves recomputation // TODO: can we track conflicts as they are added to save scanning configs later?