diff --git a/Makefile b/Makefile index ddf0f50..2e51734 100644 --- a/Makefile +++ b/Makefile @@ -167,11 +167,66 @@ $(TEST_DIR)/issue_70/optimized-grammar/issue_70.go: $(TEST_DIR)/issue_70/issue_7 $(BINDIR)/pigeon -nolint -optimize-grammar $< > $@ $(TEST_DIR)/issue_70b/issue_70b.go: $(TEST_DIR)/issue_70b/issue_70b.peg $(BINDIR)/pigeon - $(BINDIR)/pigeon -nolint --optimize-grammar $< > $@ + $(BINDIR)/pigeon -nolint -optimize-grammar -support-left-recursion $< > $@ + +$(TEST_DIR)/issue_79/issue_79.go: $(TEST_DIR)/issue_79/issue_79.peg $(BINDIR)/pigeon + @! $(BINDIR)/pigeon $< > $@ 2>/dev/null && exit 0 || echo "failure, expect build to fail due to left recursion!" && exit 1 + $(BINDIR)/pigeon -support-left-recursion $< > $@ $(TEST_DIR)/issue_80/issue_80.go: $(TEST_DIR)/issue_80/issue_80.peg $(BINDIR)/pigeon $(BINDIR)/pigeon -nolint $< > $@ +$(TEST_DIR)/left_recursion/left_recursion.go: \ + $(TEST_DIR)/left_recursion/standart/leftrecursion/left_recursion.go \ + $(TEST_DIR)/left_recursion/optimized/leftrecursion/left_recursion.go \ + $(BINDIR)/pigeon + +$(TEST_DIR)/left_recursion/standart/leftrecursion/left_recursion.go: \ + $(TEST_DIR)/left_recursion/left_recursion.peg $(BINDIR)/pigeon + $(BINDIR)/pigeon -nolint -support-left-recursion $< > $@ + +$(TEST_DIR)/left_recursion/optimized/leftrecursion/left_recursion.go: \ + $(TEST_DIR)/left_recursion/left_recursion.peg $(BINDIR)/pigeon + $(BINDIR)/pigeon -nolint -optimize-parser -support-left-recursion $< > $@ + +$(TEST_DIR)/left_recursion/without_left_recursion.go: \ + $(TEST_DIR)/left_recursion/standart/withoutleftrecursion/without_left_recursion.go \ + $(TEST_DIR)/left_recursion/optimized/withoutleftrecursion/without_left_recursion.go \ + $(BINDIR)/pigeon + +$(TEST_DIR)/left_recursion/standart/withoutleftrecursion/without_left_recursion.go: \ + $(TEST_DIR)/left_recursion/without_left_recursion.peg \ + $(BINDIR)/pigeon + $(BINDIR)/pigeon -nolint $< > $@ + +$(TEST_DIR)/left_recursion/optimized/withoutleftrecursion/without_left_recursion.go: \ + $(TEST_DIR)/left_recursion/without_left_recursion.peg \ + $(BINDIR)/pigeon + $(BINDIR)/pigeon -nolint -optimize-parser $< > $@ + +$(TEST_DIR)/left_recursion_state/left_recursion_state.go: \ + $(TEST_DIR)/left_recursion_state/standart/left_recursion_state.go \ + $(TEST_DIR)/left_recursion_state/optimized/left_recursion_state.go \ + $(BINDIR)/pigeon + +$(TEST_DIR)/left_recursion_state/standart/left_recursion_state.go: \ + $(TEST_DIR)/left_recursion_state/left_recursion_state.peg $(BINDIR)/pigeon + $(BINDIR)/pigeon -nolint -support-left-recursion $< > $@ + +$(TEST_DIR)/left_recursion_state/optimized/left_recursion_state.go: \ + $(TEST_DIR)/left_recursion_state/left_recursion_state.peg $(BINDIR)/pigeon + $(BINDIR)/pigeon -nolint -optimize-parser -support-left-recursion $< > $@ + +$(TEST_DIR)/left_recursion_labeled_failures/left_recursion_labeled_failures.go: \ + $(TEST_DIR)/left_recursion_labeled_failures/left_recursion_labeled_failures.peg \ + $(BINDIR)/pigeon + $(BINDIR)/pigeon -nolint -support-left-recursion $< > $@ + +$(TEST_DIR)/left_recursion_thrownrecover/left_recursion_thrownrecover.go: \ + $(TEST_DIR)/left_recursion_thrownrecover/left_recursion_thrownrecover.peg \ + $(BINDIR)/pigeon + $(BINDIR)/pigeon -nolint -support-left-recursion $< > $@ + lint: golint ./... go vet ./... diff --git a/THIRD-PARTY-NOTICES b/THIRD-PARTY-NOTICES new file mode 100644 index 0000000..14da9e1 --- /dev/null +++ b/THIRD-PARTY-NOTICES @@ -0,0 +1,25 @@ +---------------------------------------------------------------------- +License notice for github.com/stretchr/testify +---------------------------------------------------------------------- + +MIT License + +Copyright (c) 2012-2020 Mat Ryer, Tyler Bunnell and contributors. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/ast/ast.go b/ast/ast.go index c34b7d1..13d34b7 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -35,6 +35,8 @@ type Grammar struct { Rules []*Rule } +var _ Expression = (*Grammar)(nil) + // NewGrammar creates a new grammar at the specified position. func NewGrammar(p Pos) *Grammar { return &Grammar{p: p} @@ -56,6 +58,21 @@ func (g *Grammar) String() string { return buf.String() } +// NullableVisit recursively determines whether an object is nullable. +func (g *Grammar) NullableVisit(rules map[string]*Rule) bool { + panic("NullableVisit should not be called on the Grammar") +} + +// IsNullable returns the nullable attribute of the node. +func (g *Grammar) IsNullable() bool { + panic("IsNullable should not be called on the Grammar") +} + +// InitialNames returns names of nodes with which an expression can begin. +func (g *Grammar) InitialNames() map[string]struct{} { + panic("InitialNames should not be called on the Grammar") +} + // Rule represents a rule in the PEG grammar. It has a name, an optional // display name to be used in error messages, and an expression. type Rule struct { @@ -63,8 +80,16 @@ type Rule struct { Name *Identifier DisplayName *StringLit Expr Expression + + // Fields below to work with left recursion. + Visited bool + Nullable bool + LeftRecursive bool + Leader bool } +var _ Expression = (*Rule)(nil) + // NewRule creates a rule with at the specified position and with the // specified name as identifier. func NewRule(p Pos, name *Identifier) *Rule { @@ -80,9 +105,36 @@ func (r *Rule) String() string { r.p, r, r.Name, r.DisplayName, r.Expr) } +// NullableVisit recursively determines whether an object is nullable. +func (r *Rule) NullableVisit(rules map[string]*Rule) bool { + if r.Visited { + // A left-recursive rule is considered non-nullable. + return false + } + r.Visited = true + r.Nullable = r.Expr.NullableVisit(rules) + r.Visited = false + return r.Nullable +} + +// IsNullable returns the nullable attribute of the node. +func (r *Rule) IsNullable() bool { + return r.Nullable +} + +// InitialNames returns names of nodes with which an expression can begin. +func (r *Rule) InitialNames() map[string]struct{} { + return r.Expr.InitialNames() +} + // Expression is the interface implemented by all expression types. type Expression interface { Pos() Pos + + // for work with left recursion + NullableVisit(rules map[string]*Rule) bool + IsNullable() bool + InitialNames() map[string]struct{} } // ChoiceExpr is an ordered sequence of expressions. The parser tries to @@ -91,8 +143,12 @@ type Expression interface { type ChoiceExpr struct { p Pos Alternatives []Expression + + Nullable bool } +var _ Expression = (*ChoiceExpr)(nil) + // NewChoiceExpr creates a choice expression at the specified position. func NewChoiceExpr(p Pos) *ChoiceExpr { return &ChoiceExpr{p: p} @@ -113,7 +169,35 @@ func (c *ChoiceExpr) String() string { return buf.String() } -// FailureLabel is an identifier, which can by thrown and recovered in a grammar +// NullableVisit recursively determines whether an object is nullable. +func (c *ChoiceExpr) NullableVisit(rules map[string]*Rule) bool { + for _, alt := range c.Alternatives { + if alt.NullableVisit(rules) { + c.Nullable = true + return true + } + } + c.Nullable = false + return false +} + +// IsNullable returns the nullable attribute of the node. +func (c *ChoiceExpr) IsNullable() bool { + return c.Nullable +} + +// InitialNames returns names of nodes with which an expression can begin. +func (c *ChoiceExpr) InitialNames() map[string]struct{} { + names := make(map[string]struct{}) + for _, alt := range c.Alternatives { + for name := range alt.InitialNames() { + names[name] = struct{}{} + } + } + return names +} + +// FailureLabel is an identifier, which can by thrown and recovered in a grammar. type FailureLabel string // RecoveryExpr is an ordered sequence of expressions. The parser tries to @@ -124,8 +208,12 @@ type RecoveryExpr struct { Expr Expression RecoverExpr Expression Labels []FailureLabel + + Nullable bool } +var _ Expression = (*RecoveryExpr)(nil) + // NewRecoveryExpr creates a choice expression at the specified position. func NewRecoveryExpr(p Pos) *RecoveryExpr { return &RecoveryExpr{p: p} @@ -147,6 +235,29 @@ func (r *RecoveryExpr) String() string { return buf.String() } +// NullableVisit recursively determines whether an object is nullable. +func (r *RecoveryExpr) NullableVisit(rules map[string]*Rule) bool { + r.Nullable = r.Expr.NullableVisit(rules) || r.RecoverExpr.NullableVisit(rules) + return r.Nullable +} + +// IsNullable returns the nullable attribute of the node. +func (r *RecoveryExpr) IsNullable() bool { + return r.Nullable +} + +// InitialNames returns names of nodes with which an expression can begin. +func (r *RecoveryExpr) InitialNames() map[string]struct{} { + names := make(map[string]struct{}) + for name := range r.Expr.InitialNames() { + names[name] = struct{}{} + } + for name := range r.RecoverExpr.InitialNames() { + names[name] = struct{}{} + } + return names +} + // ActionExpr is an expression that has an associated block of code to // execute when the expression matches. type ActionExpr struct { @@ -154,8 +265,12 @@ type ActionExpr struct { Expr Expression Code *CodeBlock FuncIx int + + Nullable bool } +var _ Expression = (*ActionExpr)(nil) + // NewActionExpr creates a new action expression at the specified position. func NewActionExpr(p Pos) *ActionExpr { return &ActionExpr{p: p} @@ -169,6 +284,26 @@ func (a *ActionExpr) String() string { return fmt.Sprintf("%s: %T{Expr: %v, Code: %v}", a.p, a, a.Expr, a.Code) } +// NullableVisit recursively determines whether an object is nullable. +func (a *ActionExpr) NullableVisit(rules map[string]*Rule) bool { + a.Nullable = a.Expr.NullableVisit(rules) + return a.Nullable +} + +// IsNullable returns the nullable attribute of the node. +func (a *ActionExpr) IsNullable() bool { + return a.Nullable +} + +// InitialNames returns names of nodes with which an expression can begin. +func (a *ActionExpr) InitialNames() map[string]struct{} { + names := make(map[string]struct{}) + for name := range a.Expr.InitialNames() { + names[name] = struct{}{} + } + return names +} + // ThrowExpr is an expression that throws an FailureLabel to be catched by a // RecoveryChoiceExpr. type ThrowExpr struct { @@ -176,6 +311,8 @@ type ThrowExpr struct { Label string } +var _ Expression = (*ThrowExpr)(nil) + // NewThrowExpr creates a new throw expression at the specified position. func NewThrowExpr(p Pos) *ThrowExpr { return &ThrowExpr{p: p} @@ -189,13 +326,32 @@ func (t *ThrowExpr) String() string { return fmt.Sprintf("%s: %T{Label: %v}", t.p, t, t.Label) } +// NullableVisit recursively determines whether an object is nullable. +func (t *ThrowExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node. +func (t *ThrowExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin. +func (t *ThrowExpr) InitialNames() map[string]struct{} { + return make(map[string]struct{}) +} + // SeqExpr is an ordered sequence of expressions, all of which must match // if the SeqExpr is to be a match itself. type SeqExpr struct { p Pos Exprs []Expression + + Nullable bool } +var _ Expression = (*SeqExpr)(nil) + // NewSeqExpr creates a new sequence expression at the specified position. func NewSeqExpr(p Pos) *SeqExpr { return &SeqExpr{p: p} @@ -216,6 +372,37 @@ func (s *SeqExpr) String() string { return buf.String() } +// NullableVisit recursively determines whether an object is nullable. +func (s *SeqExpr) NullableVisit(rules map[string]*Rule) bool { + for _, item := range s.Exprs { + if !item.NullableVisit(rules) { + s.Nullable = false + return false + } + } + s.Nullable = true + return true +} + +// IsNullable returns the nullable attribute of the node. +func (s *SeqExpr) IsNullable() bool { + return s.Nullable +} + +// InitialNames returns names of nodes with which an expression can begin. +func (s *SeqExpr) InitialNames() map[string]struct{} { + names := make(map[string]struct{}) + for _, item := range s.Exprs { + for name := range item.InitialNames() { + names[name] = struct{}{} + } + if !item.IsNullable() { + break + } + } + return names +} + // LabeledExpr is an expression that has an associated label. Code blocks // can access the value of the expression using that label, that becomes // a local variable in the code. @@ -225,6 +412,8 @@ type LabeledExpr struct { Expr Expression } +var _ Expression = (*LabeledExpr)(nil) + // NewLabeledExpr creates a new labeled expression at the specified position. func NewLabeledExpr(p Pos) *LabeledExpr { return &LabeledExpr{p: p} @@ -238,6 +427,21 @@ func (l *LabeledExpr) String() string { return fmt.Sprintf("%s: %T{Label: %v, Expr: %v}", l.p, l, l.Label, l.Expr) } +// NullableVisit recursively determines whether an object is nullable. +func (l *LabeledExpr) NullableVisit(rules map[string]*Rule) bool { + return l.Expr.NullableVisit(rules) +} + +// IsNullable returns the nullable attribute of the node. +func (l *LabeledExpr) IsNullable() bool { + return l.Expr.IsNullable() +} + +// InitialNames returns names of nodes with which an expression can begin. +func (l *LabeledExpr) InitialNames() map[string]struct{} { + return l.Expr.InitialNames() +} + // AndExpr is a zero-length matcher that is considered a match if the // expression it contains is a match. type AndExpr struct { @@ -250,6 +454,8 @@ func NewAndExpr(p Pos) *AndExpr { return &AndExpr{p: p} } +var _ Expression = (*AndExpr)(nil) + // Pos returns the starting position of the node. func (a *AndExpr) Pos() Pos { return a.p } @@ -258,6 +464,21 @@ func (a *AndExpr) String() string { return fmt.Sprintf("%s: %T{Expr: %v}", a.p, a, a.Expr) } +// NullableVisit recursively determines whether an object is nullable. +func (a *AndExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node. +func (a *AndExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin. +func (a *AndExpr) InitialNames() map[string]struct{} { + return make(map[string]struct{}) +} + // NotExpr is a zero-length matcher that is considered a match if the // expression it contains is not a match. type NotExpr struct { @@ -265,6 +486,8 @@ type NotExpr struct { Expr Expression } +var _ Expression = (*NotExpr)(nil) + // NewNotExpr creates a new not (!) expression at the specified position. func NewNotExpr(p Pos) *NotExpr { return &NotExpr{p: p} @@ -278,12 +501,29 @@ func (n *NotExpr) String() string { return fmt.Sprintf("%s: %T{Expr: %v}", n.p, n, n.Expr) } +// NullableVisit recursively determines whether an object is nullable. +func (n *NotExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node. +func (n *NotExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin. +func (n *NotExpr) InitialNames() map[string]struct{} { + return make(map[string]struct{}) +} + // ZeroOrOneExpr is an expression that can be matched zero or one time. type ZeroOrOneExpr struct { p Pos Expr Expression } +var _ Expression = (*ZeroOrOneExpr)(nil) + // NewZeroOrOneExpr creates a new zero or one expression at the specified // position. func NewZeroOrOneExpr(p Pos) *ZeroOrOneExpr { @@ -298,12 +538,29 @@ func (z *ZeroOrOneExpr) String() string { return fmt.Sprintf("%s: %T{Expr: %v}", z.p, z, z.Expr) } +// NullableVisit recursively determines whether an object is nullable. +func (z *ZeroOrOneExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node. +func (z *ZeroOrOneExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin. +func (z *ZeroOrOneExpr) InitialNames() map[string]struct{} { + return z.Expr.InitialNames() +} + // ZeroOrMoreExpr is an expression that can be matched zero or more times. type ZeroOrMoreExpr struct { p Pos Expr Expression } +var _ Expression = (*ZeroOrMoreExpr)(nil) + // NewZeroOrMoreExpr creates a new zero or more expression at the specified // position. func NewZeroOrMoreExpr(p Pos) *ZeroOrMoreExpr { @@ -318,12 +575,29 @@ func (z *ZeroOrMoreExpr) String() string { return fmt.Sprintf("%s: %T{Expr: %v}", z.p, z, z.Expr) } +// NullableVisit recursively determines whether an object is nullable. +func (z *ZeroOrMoreExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node. +func (z *ZeroOrMoreExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin. +func (z *ZeroOrMoreExpr) InitialNames() map[string]struct{} { + return z.Expr.InitialNames() +} + // OneOrMoreExpr is an expression that can be matched one or more times. type OneOrMoreExpr struct { p Pos Expr Expression } +var _ Expression = (*OneOrMoreExpr)(nil) + // NewOneOrMoreExpr creates a new one or more expression at the specified // position. func NewOneOrMoreExpr(p Pos) *OneOrMoreExpr { @@ -338,12 +612,31 @@ func (o *OneOrMoreExpr) String() string { return fmt.Sprintf("%s: %T{Expr: %v}", o.p, o, o.Expr) } +// NullableVisit recursively determines whether an object is nullable. +func (o *OneOrMoreExpr) NullableVisit(rules map[string]*Rule) bool { + return false +} + +// IsNullable returns the nullable attribute of the node. +func (o *OneOrMoreExpr) IsNullable() bool { + return false +} + +// InitialNames returns names of nodes with which an expression can begin. +func (o *OneOrMoreExpr) InitialNames() map[string]struct{} { + return o.Expr.InitialNames() +} + // RuleRefExpr is an expression that references a rule by name. type RuleRefExpr struct { p Pos Name *Identifier + + Nullable bool } +var _ Expression = (*RuleRefExpr)(nil) + // NewRuleRefExpr creates a new rule reference expression at the specified // position. func NewRuleRefExpr(p Pos) *RuleRefExpr { @@ -358,6 +651,28 @@ func (r *RuleRefExpr) String() string { return fmt.Sprintf("%s: %T{Name: %v}", r.p, r, r.Name) } +// NullableVisit recursively determines whether an object is nullable. +func (r *RuleRefExpr) NullableVisit(rules map[string]*Rule) bool { + item, ok := rules[r.Name.Val] + if !ok { + // Token or unknown; never empty. + r.Nullable = false + return false + } + r.Nullable = item.NullableVisit(rules) + return r.Nullable +} + +// IsNullable returns the nullable attribute of the node. +func (r *RuleRefExpr) IsNullable() bool { + return r.Nullable +} + +// InitialNames returns names of nodes with which an expression can begin. +func (r *RuleRefExpr) InitialNames() map[string]struct{} { + return map[string]struct{}{r.Name.Val: {}} +} + // StateCodeExpr is an expression which can modify the internal state of the parser. type StateCodeExpr struct { p Pos @@ -365,6 +680,8 @@ type StateCodeExpr struct { FuncIx int } +var _ Expression = (*StateCodeExpr)(nil) + // NewStateCodeExpr creates a new state (#) code expression at the specified // position. func NewStateCodeExpr(p Pos) *StateCodeExpr { @@ -379,6 +696,21 @@ func (s *StateCodeExpr) String() string { return fmt.Sprintf("%s: %T{Code: %v}", s.p, s, s.Code) } +// NullableVisit recursively determines whether an object is nullable. +func (s *StateCodeExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node. +func (s *StateCodeExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin. +func (s *StateCodeExpr) InitialNames() map[string]struct{} { + return make(map[string]struct{}) +} + // AndCodeExpr is a zero-length matcher that is considered a match if the // code block returns true. type AndCodeExpr struct { @@ -387,6 +719,8 @@ type AndCodeExpr struct { FuncIx int } +var _ Expression = (*AndCodeExpr)(nil) + // NewAndCodeExpr creates a new and (&) code expression at the specified // position. func NewAndCodeExpr(p Pos) *AndCodeExpr { @@ -401,6 +735,21 @@ func (a *AndCodeExpr) String() string { return fmt.Sprintf("%s: %T{Code: %v}", a.p, a, a.Code) } +// NullableVisit recursively determines whether an object is nullable. +func (a *AndCodeExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node. +func (a *AndCodeExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin. +func (a *AndCodeExpr) InitialNames() map[string]struct{} { + return make(map[string]struct{}) +} + // NotCodeExpr is a zero-length matcher that is considered a match if the // code block returns false. type NotCodeExpr struct { @@ -409,6 +758,8 @@ type NotCodeExpr struct { FuncIx int } +var _ Expression = (*NotCodeExpr)(nil) + // NewNotCodeExpr creates a new not (!) code expression at the specified // position. func NewNotCodeExpr(p Pos) *NotCodeExpr { @@ -423,6 +774,21 @@ func (n *NotCodeExpr) String() string { return fmt.Sprintf("%s: %T{Code: %v}", n.p, n, n.Code) } +// NullableVisit recursively determines whether an object is nullable. +func (n *NotCodeExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node. +func (n *NotCodeExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin. +func (n *NotCodeExpr) InitialNames() map[string]struct{} { + return make(map[string]struct{}) +} + // LitMatcher is a string literal matcher. The value to match may be a // double-quoted string, a single-quoted single character, or a back-tick // quoted raw string. @@ -431,6 +797,8 @@ type LitMatcher struct { IgnoreCase bool } +var _ Expression = (*LitMatcher)(nil) + // NewLitMatcher creates a new literal matcher at the specified position and // with the specified value. func NewLitMatcher(p Pos, v string) *LitMatcher { @@ -445,6 +813,22 @@ func (l *LitMatcher) String() string { return fmt.Sprintf("%s: %T{Val: %q, IgnoreCase: %t}", l.p, l, l.Val, l.IgnoreCase) } +// NullableVisit recursively determines whether an object is nullable. +func (l *LitMatcher) NullableVisit(rules map[string]*Rule) bool { + return l.IsNullable() +} + +// IsNullable returns the nullable attribute of the node. +func (l *LitMatcher) IsNullable() bool { + // The string token '' is considered empty. + return len(l.Val) == 0 +} + +// InitialNames returns names of nodes with which an expression can begin. +func (l *LitMatcher) InitialNames() map[string]struct{} { + return make(map[string]struct{}) +} + // CharClassMatcher is a character class matcher. The value to match must // be one of the specified characters, in a range of characters, or in the // Unicode classes of characters. @@ -457,6 +841,8 @@ type CharClassMatcher struct { UnicodeClasses []string } +var _ Expression = (*CharClassMatcher)(nil) + // NewCharClassMatcher creates a new character class matcher at the specified // position and with the specified raw value. It parses the raw value into // the list of characters, ranges and Unicode classes. @@ -580,11 +966,28 @@ func (c *CharClassMatcher) String() string { c.p, c, c.Val, c.IgnoreCase, c.Inverted) } +// NullableVisit recursively determines whether an object is nullable. +func (c *CharClassMatcher) NullableVisit(rules map[string]*Rule) bool { + return c.IsNullable() +} + +// IsNullable returns the nullable attribute of the node. +func (c *CharClassMatcher) IsNullable() bool { + return len(c.Chars) == 0 && len(c.Ranges) == 0 && len(c.UnicodeClasses) == 0 +} + +// InitialNames returns names of nodes with which an expression can begin. +func (c *CharClassMatcher) InitialNames() map[string]struct{} { + return make(map[string]struct{}) +} + // AnyMatcher is a matcher that matches any character except end-of-file. type AnyMatcher struct { posValue } +var _ Expression = (*AnyMatcher)(nil) + // NewAnyMatcher creates a new any matcher at the specified position. The // value is provided for completeness' sake, but it is always the dot. func NewAnyMatcher(p Pos, v string) *AnyMatcher { @@ -599,11 +1002,28 @@ func (a *AnyMatcher) String() string { return fmt.Sprintf("%s: %T{Val: %q}", a.p, a, a.Val) } +// NullableVisit recursively determines whether an object is nullable. +func (a *AnyMatcher) NullableVisit(rules map[string]*Rule) bool { + return false +} + +// IsNullable returns the nullable attribute of the node. +func (a *AnyMatcher) IsNullable() bool { + return false +} + +// InitialNames returns names of nodes with which an expression can begin. +func (a *AnyMatcher) InitialNames() map[string]struct{} { + return make(map[string]struct{}) +} + // CodeBlock represents a code block. type CodeBlock struct { posValue } +var _ Expression = (*CodeBlock)(nil) + // NewCodeBlock creates a new code block at the specified position and with // the specified value. The value includes the outer braces. func NewCodeBlock(p Pos, code string) *CodeBlock { @@ -618,11 +1038,28 @@ func (c *CodeBlock) String() string { return fmt.Sprintf("%s: %T{Val: %q}", c.p, c, c.Val) } +// NullableVisit recursively determines whether an object is nullable. +func (c *CodeBlock) NullableVisit(rules map[string]*Rule) bool { + panic("NullableVisit should not be called on the CodeBlock") +} + +// IsNullable returns the nullable attribute of the node. +func (c *CodeBlock) IsNullable() bool { + panic("IsNullable should not be called on the CodeBlock") +} + +// InitialNames returns names of nodes with which an expression can begin. +func (c *CodeBlock) InitialNames() map[string]struct{} { + panic("InitialNames should not be called on the CodeBlock") +} + // Identifier represents an identifier. type Identifier struct { posValue } +var _ Expression = (*Identifier)(nil) + // NewIdentifier creates a new identifier at the specified position and // with the specified name. func NewIdentifier(p Pos, name string) *Identifier { @@ -637,11 +1074,28 @@ func (i *Identifier) String() string { return fmt.Sprintf("%s: %T{Val: %q}", i.p, i, i.Val) } +// NullableVisit recursively determines whether an object is nullable. +func (i *Identifier) NullableVisit(rules map[string]*Rule) bool { + panic("NullableVisit should not be called on the Identifier") +} + +// IsNullable returns the nullable attribute of the node. +func (i *Identifier) IsNullable() bool { + panic("IsNullable should not be called on the Identifier") +} + +// InitialNames returns names of nodes with which an expression can begin. +func (i *Identifier) InitialNames() map[string]struct{} { + panic("InitialNames should not be called on the Identifier") +} + // StringLit represents a string literal. type StringLit struct { posValue } +var _ Expression = (*StringLit)(nil) + // NewStringLit creates a new string literal at the specified position and // with the specified value. func NewStringLit(p Pos, val string) *StringLit { @@ -656,6 +1110,21 @@ func (s *StringLit) String() string { return fmt.Sprintf("%s: %T{Val: %q}", s.p, s, s.Val) } +// NullableVisit recursively determines whether an object is nullable. +func (s *StringLit) NullableVisit(rules map[string]*Rule) bool { + panic("NullableVisit should not be called on the StringLit") +} + +// IsNullable returns the nullable attribute of the node. +func (s *StringLit) IsNullable() bool { + panic("IsNullable should not be called on the StringLit") +} + +// InitialNames returns names of nodes with which an expression can begin. +func (s *StringLit) InitialNames() map[string]struct{} { + panic("InitialNames should not be called on the StringLit") +} + type posValue struct { p Pos Val string diff --git a/ast/ast_optimize.go b/ast/ast_optimize.go index 4687c8b..0949adb 100644 --- a/ast/ast_optimize.go +++ b/ast/ast_optimize.go @@ -34,7 +34,7 @@ func newGrammarOptimizer(protectedRules []string) *grammarOptimizer { // Visit is a generic Visitor to be used with Walk // The actual function, which should be used during Walk -// is held in ruleRefOptimizer.visitor +// is held in ruleRefOptimizer.visitor. func (r *grammarOptimizer) Visit(expr Expression) Visitor { return r.visitor(expr) } @@ -265,7 +265,7 @@ func (r *grammarOptimizer) optimizeRule(expr Expression) Expression { // cloneExpr takes an Expression and deep clones it (including all children) // This is necessary because referenced Rules are denormalized and therefore -// have to become independent from their original Expression +// have to become independent from their original Expression. func cloneExpr(expr Expression) Expression { switch expr := expr.(type) { case *ActionExpr: @@ -359,7 +359,7 @@ func cloneExpr(expr Expression) Expression { // The purpose of this function is to cleanup the redundancies created by the // optimize Visitor. This includes to remove redundant entries in Chars, Ranges // and UnicodeClasses of the given CharClassMatcher as well as regenerating the -// correct content for the Val field (string representation of the CharClassMatcher) +// correct content for the Val field (string representation of the CharClassMatcher). func (r *grammarOptimizer) cleanupCharClassMatcher(expr0 Expression) Visitor { // We are only interested in nodes of type *CharClassMatcher if chr, ok := expr0.(*CharClassMatcher); ok { diff --git a/bootstrap/cmd/bootstrap-pigeon/bootstrap_pigeon.go b/bootstrap/cmd/bootstrap-pigeon/bootstrap_pigeon.go index fdfa0d7..55b660d 100644 --- a/bootstrap/cmd/bootstrap-pigeon/bootstrap_pigeon.go +++ b/bootstrap/cmd/bootstrap-pigeon/bootstrap_pigeon.go @@ -2800,14 +2800,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -3008,7 +3013,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -3049,36 +3054,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -3090,6 +3111,15 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -3137,9 +3167,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -3149,7 +3176,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -3163,7 +3190,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -3192,7 +3219,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -3309,7 +3336,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -3327,7 +3354,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -3383,7 +3410,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -3401,7 +3428,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -3420,7 +3447,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -3440,7 +3467,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -3453,7 +3480,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -3483,7 +3510,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -3501,7 +3528,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -3516,7 +3543,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/builder/builder.go b/builder/builder.go index ee76666..7767f55 100644 --- a/builder/builder.go +++ b/builder/builder.go @@ -77,6 +77,16 @@ func Optimize(optimize bool) Option { } } +// SupportLeftRecursion returns an option that specifies the supportLeftRecursion option. +// If supportLeftRecursion is true, LeftRecursion code is added to the resulting parser. +func SupportLeftRecursion(support bool) Option { + return func(b *builder) Option { + prev := b.supportLeftRecursion + b.supportLeftRecursion = support + return SupportLeftRecursion(prev) + } +} + // Nolint returns an option that specifies the nolint option // If nolint is true, special '// nolint: ...' comments are added // to the generated parser to suppress warnings by gometalinter. @@ -118,6 +128,8 @@ type builder struct { basicLatinLookupTable bool globalState bool nolint bool + supportLeftRecursion bool + haveLeftRecursion bool ruleName string exprIndex int @@ -132,11 +144,19 @@ func (b *builder) setOptions(opts []Option) { } } -func (b *builder) buildParser(g *ast.Grammar) error { - b.writeInit(g.Init) - b.writeGrammar(g) +func (b *builder) buildParser(grammar *ast.Grammar) error { + haveLeftRecursion, err := PrepareGrammar(grammar) + if err != nil { + return fmt.Errorf("incorrect grammar: %w", err) + } + if !b.supportLeftRecursion && haveLeftRecursion { + return fmt.Errorf("incorrect grammar: %w", ErrHaveLeftRecursion) + } + b.haveLeftRecursion = haveLeftRecursion - for _, rule := range g.Rules { + b.writeInit(grammar.Init) + b.writeGrammar(grammar) + for _, rule := range grammar.Rules { b.writeRuleCode(rule) } b.writeStaticCode() @@ -183,6 +203,10 @@ func (b *builder) writeRule(r *ast.Rule) { b.writelnf("\tpos: position{line: %d, col: %d, offset: %d},", pos.Line, pos.Col, pos.Off) b.writef("\texpr: ") b.writeExpr(r.Expr) + if b.haveLeftRecursion { + b.writelnf("\tleader: %t,", r.Leader) + b.writelnf("\tleftRecursive: %t,", r.LeftRecursive) + } b.writelnf("},") } @@ -758,11 +782,13 @@ func (b *builder) writeStaticCode() { Optimize bool BasicLatinLookupTable bool GlobalState bool + LeftRecursion bool Nolint bool }{ Optimize: b.optimize, BasicLatinLookupTable: b.basicLatinLookupTable, GlobalState: b.globalState, + LeftRecursion: b.haveLeftRecursion, Nolint: b.nolint, } t := template.Must(template.New("static_code").Parse(staticCode)) diff --git a/builder/generated_static_code.go b/builder/generated_static_code.go index f408ef6..ede6831 100644 --- a/builder/generated_static_code.go +++ b/builder/generated_static_code.go @@ -251,6 +251,11 @@ type rule struct { name string displayName string expr any + + // ==template== {{ if .LeftRecursion }} + leader bool + leftRecursive bool + // {{ end }} ==template== } // {{ if .Nolint }} nolint: structcheck {{else}} ==template== {{ end }} @@ -489,6 +494,14 @@ type Stats struct { ChoiceAltCnt map[string]map[string]int } +// ==template== {{ if .LeftRecursion }} +type ruleWithExpsStack struct { + rule *rule + estack []any +} + +// {{ end }} ==template== + // {{ if .Nolint }} nolint: structcheck,maligned {{else}} ==template== {{ end }} type parser struct { filename string @@ -504,6 +517,8 @@ type parser struct { debug bool memoize bool + // {{ end }} ==template== + // ==template== {{ if or .LeftRecursion (not .Optimize) }} // memoization table for the packrat algorithm: // map[offset in source] map[expression or rule] {value, match} memo map[int]map[any]resultTuple @@ -603,14 +618,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } // {{ end }} ==template== @@ -756,7 +776,7 @@ func (p *parser) sliceFrom(start savepoint) []byte { return p.data[start.position.offset:p.pt.position.offset] } -// ==template== {{ if not .Optimize }} +// ==template== {{ if or .LeftRecursion (not .Optimize) }} func (p *parser) getMemoized(node any) (resultTuple, bool) { if len(p.memo) == 0 { return resultTuple{}, false @@ -829,7 +849,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -870,45 +890,156 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +// ==template== {{ if .LeftRecursion }} + +func (p *parser) parseRuleRecursiveLeader(rule *rule) (any, bool) { + result, ok := p.getMemoized(rule) + if ok { + p.restore(result.end) + return result.v, result.b + } + // ==template== {{ if not .Optimize }} if p.debug { - defer p.out(p.in("parseRule " + rule.name)) + defer p.out(p.in("recursive " + rule.name)) + } + // {{ end }} ==template== + + var ( + depth = 0 + startMark = p.pt + lastResult = resultTuple{nil, false, startMark} + lastErrors = *p.errs + ) + + for { + // ==template== {{ if or .GlobalState (not .Optimize) }} + lastState := p.cloneState() + // {{ end }} ==template== + p.setMemoized(startMark, rule, lastResult) + val, ok := p.parseRule(rule) + endMark := p.pt + // ==template== {{ if not .Optimize }} + if p.debug { + p.printIndent("RECURSIVE", fmt.Sprintf( + "Rule %s depth %d: %t -> %s", + rule.name, depth, ok, string(p.sliceFrom(startMark)))) + } + // {{ end }} ==template== + if (!ok) || (endMark.offset <= lastResult.end.offset && depth != 0) { + // ==template== {{ if or .GlobalState (not .Optimize) }} + p.restoreState(lastState) + // {{ end }} ==template== + *p.errs = lastErrors + break + } + lastResult = resultTuple{val, ok, endMark} + lastErrors = *p.errs + p.restore(startMark) + depth++ } + p.restore(lastResult.end) + p.setMemoized(startMark, rule, lastResult) + return lastResult.v, lastResult.b +} + +func (p *parser) parseRuleRecursiveNoLeader(rule *rule) (any, bool) { + return p.parseRule(rule) +} + +// {{ end }} ==template== + +// ==template== {{ if not .Optimize }} +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +// {{ end }} ==template== + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { + // ==template== {{ if not .Optimize }} + if p.debug { + defer p.out(p.in("parseRule " + rule.name)) + } + // {{ end }} ==template== + var ( + val any + ok bool + // ==template== {{ if not .Optimize }} + startMark = p.pt + // {{ end }} ==template== + ) + + // ==template== {{ if and .LeftRecursion (not .Optimize) }} + if p.memoize || rule.leftRecursive { + if rule.leader { + val, ok = p.parseRuleRecursiveLeader(rule) + } else if p.memoize && !rule.leftRecursive { + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRuleRecursiveNoLeader(rule) + } + } else { + val, ok = p.parseRule(rule) + } + // {{ else if not .Optimize }} if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) + } + // {{ else if .LeftRecursion }} + if rule.leftRecursive { + if rule.leader { + val, ok = p.parseRuleRecursiveLeader(rule) + } else { + val, ok = p.parseRuleRecursiveNoLeader(rule) } + } else { + val, ok = p.parseRule(rule) } + // {{ else }} + val, ok = p.parseRule(rule) + // {{ end }} ==template== - start := p.pt + // ==template== {{ if not .Optimize }} + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } // {{ end }} ==template== + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - // ==template== {{ if not .Optimize }} - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } - // {{ end }} ==template== return val, ok } -// {{ if .Nolint }} nolint: gocyclo {{else}} ==template== {{ end }} -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { // ==template== {{ if not .Optimize }} var pt savepoint + // ==template== {{ if .LeftRecursion }} + isLeftRecusion := p.rstack[len(p.rstack)-1].leftRecursive + if p.memoize && !isLeftRecusion { + // {{ else }} if p.memoize { + // {{ end }} ==template== res, ok := p.getMemoized(expr) if ok { p.restore(res.end) @@ -918,7 +1049,22 @@ func (p *parser) parseExpr(expr any) (any, bool) { } // {{ end }} ==template== + val, ok := p.parseExpr(expr) + // ==template== {{ if not .Optimize }} + // ==template== {{ if .LeftRecursion }} + if p.memoize && !isLeftRecusion { + // {{ else }} + if p.memoize { + // {{ end }} ==template== + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + // {{ end }} ==template== + return val, ok +} + +// {{ if .Nolint }} nolint: gocyclo {{else}} ==template== {{ end }} +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -968,11 +1114,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - // ==template== {{ if not .Optimize }} - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } - // {{ end }} ==template== return val, ok } @@ -984,7 +1125,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { // {{ end }} ==template== start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1003,7 +1144,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } // ==template== {{ if not .Optimize }} if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } // {{ end }} ==template== return val, ok @@ -1043,7 +1184,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { state := p.cloneState() // {{ end }} ==template== p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() // ==template== {{ if or .GlobalState (not .Optimize) }} p.restoreState(state) @@ -1176,8 +1317,8 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { if p.debug { defer p.out(p.in("parseChoiceExpr")) } - // {{ end }} ==template== + for altI, alt := range ch.alternatives { // dummy assignment to prevent compile error if optimized _ = altI @@ -1187,7 +1328,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { // {{ end }} ==template== p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { // ==template== {{ if not .Optimize }} @@ -1213,7 +1354,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { // {{ end }} ==template== p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1281,7 +1422,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { // {{ end }} ==template== p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() // ==template== {{ if or .GlobalState (not .Optimize) }} @@ -1303,7 +1444,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1325,7 +1466,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { // {{ end }} ==template== p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1347,7 +1488,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1364,7 +1505,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { state := p.cloneState() // {{ end }} ==template== for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { // ==template== {{ if or .GlobalState (not .Optimize) }} p.restoreState(state) @@ -1405,7 +1546,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1425,7 +1566,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1442,7 +1583,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { // {{ end }} ==template== p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/builder/left_recursion.go b/builder/left_recursion.go new file mode 100644 index 0000000..d669d4a --- /dev/null +++ b/builder/left_recursion.go @@ -0,0 +1,132 @@ +package builder + +import ( + "errors" + "fmt" + + "github.com/mna/pigeon/ast" +) + +var ( + // ErrNoLeader is no leader error. + ErrNoLeader = errors.New( + "SCC has no leadership candidate (no element is included in all cycles)") + // ErrHaveLeftRecursion is recursion error. + ErrHaveLeftRecursion = errors.New("grammar contains left recursion") +) + +// PrepareGrammar evaluates parameters associated with left recursion. +func PrepareGrammar(grammar *ast.Grammar) (bool, error) { + mapRules := make(map[string]*ast.Rule, len(grammar.Rules)) + for _, rule := range grammar.Rules { + mapRules[rule.Name.Val] = rule + } + ComputeNullables(mapRules) + haveLeftRecursion, err := ComputeLeftRecursives(mapRules) + if err != nil { + return false, fmt.Errorf("error compute left recursive: %w", err) + } + return haveLeftRecursion, nil +} + +// ComputeNullables evaluates nullable nodes. +func ComputeNullables(rules map[string]*ast.Rule) { + // Compute which rules in a grammar are nullable + for _, rule := range rules { + rule.NullableVisit(rules) + } +} + +func findLeader( + graph map[string]map[string]struct{}, scc map[string]struct{}, +) (string, error) { + // Try to find a leader such that all cycles go through it. + leaders := make(map[string]struct{}, len(scc)) + for k := range scc { + leaders[k] = struct{}{} + } + for start := range scc { + cycles, err := FindCyclesInSCC(graph, scc, start) + if err != nil { + return "", fmt.Errorf("error find cycles: %w", err) + } + for _, cycle := range cycles { + mapCycle := make(map[string]struct{}, len(cycle)) + for _, k := range cycle { + mapCycle[k] = struct{}{} + } + for k := range scc { + if _, okCycle := mapCycle[k]; !okCycle { + delete(leaders, k) + } + } + if len(leaders) == 0 { + return "", ErrNoLeader + } + } + } + // Pick an arbitrary leader from the candidates. + var leader string + for k := range leaders { + leader = k // The only element. + break + } + return leader, nil +} + +// ComputeLeftRecursives evaluates left recursion. +func ComputeLeftRecursives(rules map[string]*ast.Rule) (bool, error) { + graph := MakeFirstGraph(rules) + vertices := make([]string, 0, len(graph)) + haveLeftRecursion := false + for k := range graph { + vertices = append(vertices, k) + } + sccs := StronglyConnectedComponents(vertices, graph) + for _, scc := range sccs { + if len(scc) > 1 { + for name := range scc { + rules[name].LeftRecursive = true + haveLeftRecursion = true + } + leader, err := findLeader(graph, scc) + if err != nil { + return false, fmt.Errorf("error find leader %v: %w", scc, err) + } + rules[leader].Leader = true + } else { + var name string + for k := range scc { + name = k // The only element. + break + } + if _, ok := graph[name][name]; ok { + rules[name].LeftRecursive = true + rules[name].Leader = true + haveLeftRecursion = true + } + } + } + return haveLeftRecursion, nil +} + +// MakeFirstGraph compute the graph of left-invocations. +// There's an edge from A to B if A may invoke B at its initial position. +// Note that this requires the nullable flags to have been computed. +func MakeFirstGraph(rules map[string]*ast.Rule) map[string]map[string]struct{} { + graph := make(map[string]map[string]struct{}) + vertices := make(map[string]struct{}) + for rulename, rule := range rules { + names := rule.InitialNames() + graph[rulename] = names + for name := range names { + vertices[name] = struct{}{} + } + } + for vertex := range vertices { + if _, ok := graph[vertex]; !ok { + graph[vertex] = make(map[string]struct{}) + } + } + return graph +} diff --git a/builder/left_recursion_test.go b/builder/left_recursion_test.go new file mode 100644 index 0000000..96a5b09 --- /dev/null +++ b/builder/left_recursion_test.go @@ -0,0 +1,217 @@ +package builder_test + +import ( + "errors" + "strings" + "testing" + + "github.com/mna/pigeon/ast" + "github.com/mna/pigeon/bootstrap" + "github.com/mna/pigeon/builder" +) + +func TestLeftRecursive(t *testing.T) { + t.Parallel() + + text := ` + start = expr NEWLINE + expr = ('-' term / expr '+' term / term) + term = NUMBER + foo = NAME+ + bar = NAME* + baz = NAME? + ` + p := bootstrap.NewParser() + grammar, err := p.Parse("", strings.NewReader(text)) + if err != nil { + t.Fatal(err) + } + haveLeftRecursion, err := builder.PrepareGrammar(grammar) + if err != nil { + t.Fatal(err) + } + if !haveLeftRecursion { + t.Fatalf("Recursion not found") + } + + mapRules := make(map[string]*ast.Rule, len(grammar.Rules)) + for _, rule := range grammar.Rules { + mapRules[rule.Name.Val] = rule + } + if mapRules["start"].LeftRecursive { + t.Error("Rule 'start' does not contain left recursion") + } + if !mapRules["expr"].LeftRecursive { + t.Error("Rule 'expr' contains left recursion") + } + if mapRules["term"].LeftRecursive { + t.Error("Rule 'term' does not contain left recursion") + } + if mapRules["foo"].LeftRecursive { + t.Error("Rule 'foo' does not contain left recursion") + } + if mapRules["bar"].LeftRecursive { + t.Error("Rule 'bar' does not contain left recursion") + } + if mapRules["baz"].LeftRecursive { + t.Error("Rule 'baz' does not contain left recursion") + } +} + +func TestNullable(t *testing.T) { + t.Parallel() + + text := ` + start = sign NUMBER + sign = ('-' / '+')? + ` + p := bootstrap.NewParser() + grammar, err := p.Parse("", strings.NewReader(text)) + if err != nil { + t.Fatal(err) + } + haveLeftRecursion, err := builder.PrepareGrammar(grammar) + if err != nil { + t.Fatal(err) + } + if haveLeftRecursion { + t.Fatalf("Recursion found") + } + mapRules := make(map[string]*ast.Rule, len(grammar.Rules)) + for _, rule := range grammar.Rules { + mapRules[rule.Name.Val] = rule + } + if mapRules["start"].Nullable { + t.Error("Rule 'start' is not nullable") + } + if !mapRules["sign"].Nullable { + t.Error("Rule 'sign' is nullable") + } +} + +func TestAdvancedLeftRecursive(t *testing.T) { + t.Parallel() + + text := ` + start = NUMBER / sign start + sign = '-'? + ` + p := bootstrap.NewParser() + grammar, err := p.Parse("", strings.NewReader(text)) + if err != nil { + t.Fatal(err) + } + haveLeftRecursion, err := builder.PrepareGrammar(grammar) + if err != nil { + t.Fatal(err) + } + if !haveLeftRecursion { + t.Fatalf("Recursion not found") + } + mapRules := make(map[string]*ast.Rule, len(grammar.Rules)) + for _, rule := range grammar.Rules { + mapRules[rule.Name.Val] = rule + } + if mapRules["start"].Nullable { + t.Error("Rule 'start' is not Nullable") + } + if !mapRules["sign"].Nullable { + t.Error("Rule 'sign' is Nullable") + } + if !mapRules["start"].LeftRecursive { + t.Error("Rule 'start' does not contain left recursion") + } + if mapRules["sign"].LeftRecursive { + t.Error("Rule 'sign' contains left recursion") + } +} + +func TestMutuallyLeftRecursive(t *testing.T) { + t.Parallel() + + text := ` + start = foo 'E' + foo = bar 'A' / 'B' + bar = foo 'C' / 'D' + ` + p := bootstrap.NewParser() + grammar, err := p.Parse("", strings.NewReader(text)) + if err != nil { + t.Fatal(err) + } + haveLeftRecursion, err := builder.PrepareGrammar(grammar) + if err != nil { + t.Fatal(err) + } + if !haveLeftRecursion { + t.Fatalf("Recursion not found") + } + mapRules := make(map[string]*ast.Rule, len(grammar.Rules)) + for _, rule := range grammar.Rules { + mapRules[rule.Name.Val] = rule + } + if mapRules["start"].LeftRecursive { + t.Error("Rule 'start' does not contain left recursion") + } + if !mapRules["foo"].LeftRecursive { + t.Error("Rule 'foo' contains left recursion") + } + if !mapRules["bar"].LeftRecursive { + t.Error("Rule 'bar' contains left recursion") + } +} + +func TestNastyMutuallyLeftRecursive(t *testing.T) { + t.Parallel() + + text := ` + start = target '=' + target = maybe '+' / NAME + maybe = maybe '-' / target + ` + p := bootstrap.NewParser() + grammar, err := p.Parse("", strings.NewReader(text)) + if err != nil { + t.Fatal(err) + } + haveLeftRecursion, err := builder.PrepareGrammar(grammar) + if err != nil { + t.Fatal(err) + } + if !haveLeftRecursion { + t.Fatalf("Recursion not found") + } + mapRules := make(map[string]*ast.Rule, len(grammar.Rules)) + for _, rule := range grammar.Rules { + mapRules[rule.Name.Val] = rule + } + if mapRules["start"].LeftRecursive { + t.Error("Rule 'start' does not contain left recursion") + } + if !mapRules["target"].LeftRecursive { + t.Error("Rule 'target' contains left recursion") + } + if !mapRules["maybe"].LeftRecursive { + t.Error("Rule 'maybe' contains left recursion") + } +} + +func TestLeftRecursionTooComplex(t *testing.T) { + t.Parallel() + + text := ` + start = foo + foo = bar '+' / baz '+' / '+' + bar = baz '-' / foo '-' / '-' + baz = foo '*' / bar '*' / '*' + ` + p := bootstrap.NewParser() + grammar, err := p.Parse("", strings.NewReader(text)) + if err != nil { + t.Fatal(err) + } + _, err = builder.PrepareGrammar(grammar) + if !errors.Is(err, builder.ErrNoLeader) { + t.Fatalf("Got %s, but expected %s", err, builder.ErrNoLeader) + } +} diff --git a/builder/scc.go b/builder/scc.go new file mode 100644 index 0000000..6e8381c --- /dev/null +++ b/builder/scc.go @@ -0,0 +1,151 @@ +package builder + +import ( + "errors" + "fmt" +) + +// ErrInvalidParameters is parameters error. +var ErrInvalidParameters = errors.New("invalid parameters passed to function") + +func min(a1 int, a2 int) int { + if a1 <= a2 { + return a1 + } + return a2 +} + +// StronglyConnectedComponents compute strongly сonnected сomponents of a graph. +// Tarjan's strongly connected components algorithm. +func StronglyConnectedComponents( + vertices []string, edges map[string]map[string]struct{}, +) []map[string]struct{} { + // Tarjan's strongly connected components algorithm + var ( + identified = map[string]struct{}{} + stack = []string{} + index = map[string]int{} + lowlink = map[string]int{} + dfs func(v string) []map[string]struct{} + ) + + dfs = func(vertex string) []map[string]struct{} { + index[vertex] = len(stack) + stack = append(stack, vertex) + lowlink[vertex] = index[vertex] + + sccs := []map[string]struct{}{} + for w := range edges[vertex] { + if _, ok := index[w]; !ok { + sccs = append(sccs, dfs(w)...) + lowlink[vertex] = min(lowlink[vertex], lowlink[w]) + } else if _, ok := identified[w]; !ok { + lowlink[vertex] = min(lowlink[vertex], lowlink[w]) + } + } + + if lowlink[vertex] == index[vertex] { + scc := map[string]struct{}{} + for _, v := range stack[index[vertex]:] { + scc[v] = struct{}{} + } + stack = stack[:index[vertex]] + for v := range scc { + identified[v] = struct{}{} + } + sccs = append(sccs, scc) + } + return sccs + } + + sccs := []map[string]struct{}{} + for _, v := range vertices { + if _, ok := index[v]; !ok { + sccs = append(sccs, dfs(v)...) + } + } + return sccs +} + +func contains(s []string, e string) bool { + for _, a := range s { + if a == e { + return true + } + } + return false +} + +func reduceGraph( + graph map[string]map[string]struct{}, scc map[string]struct{}, +) map[string]map[string]struct{} { + reduceGraph := map[string]map[string]struct{}{} + for src, dsts := range graph { + if _, ok := scc[src]; !ok { + continue + } + reduceGraph[src] = map[string]struct{}{} + for dst := range dsts { + if _, ok := scc[dst]; !ok { + continue + } + reduceGraph[src][dst] = struct{}{} + } + } + return reduceGraph +} + +// FindCyclesInSCC find cycles in SCC emanating from start. +// Yields lists of the form ['A', 'B', 'C', 'A'], which means there's +// a path from A -> B -> C -> A. The first item is always the start +// argument, but the last item may be another element, e.g. ['A', +// 'B', 'C', 'B'] means there's a path from A to B and there's a +// cycle from B to C and back. +func FindCyclesInSCC( + graph map[string]map[string]struct{}, scc map[string]struct{}, start string, +) ([][]string, error) { + // Basic input checks. + if _, ok := scc[start]; !ok { + return nil, fmt.Errorf( + "%w: scc %v does not contain %q", ErrInvalidParameters, scc, start) + } + extravertices := []string{} + for k := range scc { + if _, ok := graph[k]; !ok { + extravertices = append(extravertices, k) + } + } + if len(extravertices) != 0 { + return nil, fmt.Errorf( + "%w: graph does not contain scc. %v", + ErrInvalidParameters, extravertices) + } + + // Reduce the graph to nodes in the SCC. + graph = reduceGraph(graph, scc) + if _, ok := graph[start]; !ok { + return nil, fmt.Errorf( + "%w: graph %v does not contain %q", + ErrInvalidParameters, graph, start) + } + + // Recursive helper that yields cycles. + var dfs func(node string, path []string) [][]string + dfs = func(node string, path []string) [][]string { + ret := [][]string{} + if contains(path, node) { + t := make([]string, 0, len(path)+1) + t = append(t, path...) + t = append(t, node) + ret = append(ret, t) + return ret + } + path = append(path, node) // TODO: Make this not quadratic. + for child := range graph[node] { + ret = append(ret, dfs(child, path)...) + } + return ret + } + + return dfs(start, []string{}), nil +} diff --git a/builder/scc_test.go b/builder/scc_test.go new file mode 100644 index 0000000..1b7d466 --- /dev/null +++ b/builder/scc_test.go @@ -0,0 +1,222 @@ +package builder_test + +import ( + "testing" + + "github.com/mna/pigeon/builder" + "github.com/mna/pigeon/testutils" +) + +func TestStronglyConnectedComponents(t *testing.T) { //nolint:funlen + t.Parallel() + + type want struct { + sccs []map[string]struct{} + } + + tests := []struct { + name string + graph map[string]map[string]struct{} + want want + }{ + { + name: "Simple", + graph: map[string]map[string]struct{}{ + "1": {"2": {}}, + "2": {"1": {}}, + }, + want: want{sccs: []map[string]struct{}{ + {"2": {}, "1": {}}, + }}, + }, + { + name: "Without scc", + graph: map[string]map[string]struct{}{ + "1": {"2": {}}, + }, + want: want{sccs: []map[string]struct{}{ + {"2": {}}, + {"1": {}}, + }}, + }, + { + name: "One element", + graph: map[string]map[string]struct{}{ + "1": {}, + }, + want: want{sccs: []map[string]struct{}{ + {"1": {}}, + }}, + }, + { + name: "One element with loop", + graph: map[string]map[string]struct{}{ + "1": {"1": {}}, + }, + want: want{sccs: []map[string]struct{}{ + {"1": {}}, + }}, + }, + { + name: "Wiki 1", + graph: map[string]map[string]struct{}{ + "1": {"2": {}}, + "2": {"3": {}}, + "3": {"1": {}}, + "4": {"2": {}, "3": {}, "6": {}}, + "5": {"3": {}, "7": {}}, + "6": {"4": {}, "5": {}}, + "7": {"5": {}}, + "8": {"6": {}, "7": {}, "8": {}}, + }, + want: want{sccs: []map[string]struct{}{ + {"2": {}, "3": {}, "1": {}}, + {"5": {}, "7": {}}, + {"4": {}, "6": {}}, + {"8": {}}, + }}, + }, + { + name: "Wiki 2", + graph: map[string]map[string]struct{}{ + "1": {"2": {}, "6": {}}, + "2": {"6": {}, "4": {}}, + "3": {"9": {}, "4": {}, "8": {}}, + "4": {"1": {}, "7": {}}, + "5": {"9": {}, "8": {}}, + "6": {"1": {}, "4": {}, "7": {}}, + "7": {"1": {}}, + "8": {"5": {}, "3": {}}, + "9": {"8": {}}, + }, + want: want{sccs: []map[string]struct{}{ + {"1": {}, "2": {}, "4": {}, "6": {}, "7": {}}, + {"3": {}, "5": {}, "9": {}, "8": {}}, + }}, + }, + } + + for _, testCase := range tests { + testCase := testCase + t.Run(testCase.name, func(t *testing.T) { + t.Parallel() + vertices := make([]string, 0, len(testCase.graph)) + for k := range testCase.graph { + vertices = append(vertices, k) + } + sccs := builder.StronglyConnectedComponents(vertices, testCase.graph) + if !testutils.ElementsMatch(sccs, testCase.want.sccs) { + t.Fatalf("Result %v, expected %v", sccs, testCase.want.sccs) + } + }) + } +} + +func TestFindCyclesInSCC(t *testing.T) { //nolint:funlen + t.Parallel() + + type want struct { + paths [][]string + } + + tests := []struct { + name string + graph map[string]map[string]struct{} + scc map[string]struct{} + start string + want want + }{ + { + name: "Wiki 1 1", + graph: map[string]map[string]struct{}{ + "1": {"2": {}}, + "2": {"3": {}}, + "3": {"1": {}}, + "4": {"2": {}, "3": {}, "6": {}}, + "5": {"3": {}, "7": {}}, + "6": {"4": {}, "5": {}}, + "7": {"5": {}}, + "8": {"6": {}, "7": {}, "8": {}}, + }, + scc: map[string]struct{}{"2": {}, "3": {}, "1": {}}, + start: "3", + want: want{paths: [][]string{{"3", "1", "2", "3"}}}, + }, + { + name: "Wiki 1 2", + graph: map[string]map[string]struct{}{ + "1": {"2": {}}, + "2": {"3": {}}, + "3": {"1": {}}, + "4": {"2": {}, "3": {}, "6": {}}, + "5": {"3": {}, "7": {}}, + "6": {"4": {}, "5": {}}, + "7": {"5": {}}, + "8": {"6": {}, "7": {}, "8": {}}, + }, + scc: map[string]struct{}{"5": {}, "7": {}}, + start: "5", + want: want{paths: [][]string{{"5", "7", "5"}}}, + }, + { + name: "Wiki 2", + graph: map[string]map[string]struct{}{ + "1": {"2": {}, "6": {}}, + "2": {"6": {}, "4": {}}, + "3": {"9": {}, "4": {}, "8": {}}, + "4": {"1": {}, "7": {}}, + "5": {"9": {}, "8": {}}, + "6": {"1": {}, "4": {}, "7": {}}, + "7": {"1": {}}, + "8": {"5": {}, "3": {}}, + "9": {"8": {}}, + }, + scc: map[string]struct{}{ + "1": {}, "2": {}, "4": {}, "6": {}, "7": {}, + }, + start: "1", + want: want{paths: [][]string{ + {"1", "2", "6", "1"}, + {"1", "2", "6", "4", "1"}, + {"1", "2", "6", "4", "7", "1"}, + {"1", "2", "6", "7", "1"}, + {"1", "2", "4", "1"}, + {"1", "2", "4", "7", "1"}, + {"1", "6", "1"}, + {"1", "6", "7", "1"}, + {"1", "6", "4", "7", "1"}, + {"1", "6", "4", "1"}, + }}, + }, + { + name: "loop in loop", + graph: map[string]map[string]struct{}{ + "1": {"2": {}}, + "2": {"3": {}}, + "3": {"1": {}, "2": {}}, + }, + scc: map[string]struct{}{ + "1": {}, "2": {}, "3": {}, + }, + start: "1", + want: want{paths: [][]string{ + {"1", "2", "3", "1"}, + {"1", "2", "3", "2"}, + }}, + }, + } + for _, testCase := range tests { + testCase := testCase + t.Run(testCase.name, func(t *testing.T) { + t.Parallel() + paths, err := builder.FindCyclesInSCC( + testCase.graph, testCase.scc, testCase.start) + if err != nil { + t.FailNow() + } + if !testutils.ElementsMatch(paths, testCase.want.paths) { + t.Fatalf("Result %v, expected %v", paths, testCase.want.paths) + } + }) + } +} diff --git a/builder/static_code.go b/builder/static_code.go index 3628ae1..a1a884c 100644 --- a/builder/static_code.go +++ b/builder/static_code.go @@ -269,6 +269,11 @@ type rule struct { name string displayName string expr any + + // ==template== {{ if .LeftRecursion }} + leader bool + leftRecursive bool + // {{ end }} ==template== } // {{ if .Nolint }} nolint: structcheck {{else}} ==template== {{ end }} @@ -507,6 +512,14 @@ type Stats struct { ChoiceAltCnt map[string]map[string]int } +// ==template== {{ if .LeftRecursion }} +type ruleWithExpsStack struct { + rule *rule + estack []any +} + +// {{ end }} ==template== + // {{ if .Nolint }} nolint: structcheck,maligned {{else}} ==template== {{ end }} type parser struct { filename string @@ -522,6 +535,8 @@ type parser struct { debug bool memoize bool + // {{ end }} ==template== + // ==template== {{ if or .LeftRecursion (not .Optimize) }} // memoization table for the packrat algorithm: // map[offset in source] map[expression or rule] {value, match} memo map[int]map[any]resultTuple @@ -621,14 +636,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } // {{ end }} ==template== @@ -774,7 +794,7 @@ func (p *parser) sliceFrom(start savepoint) []byte { return p.data[start.position.offset:p.pt.position.offset] } -// ==template== {{ if not .Optimize }} +// ==template== {{ if or .LeftRecursion (not .Optimize) }} func (p *parser) getMemoized(node any) (resultTuple, bool) { if len(p.memo) == 0 { return resultTuple{}, false @@ -847,7 +867,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -888,45 +908,156 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +// ==template== {{ if .LeftRecursion }} + +func (p *parser) parseRuleRecursiveLeader(rule *rule) (any, bool) { + result, ok := p.getMemoized(rule) + if ok { + p.restore(result.end) + return result.v, result.b + } + // ==template== {{ if not .Optimize }} if p.debug { - defer p.out(p.in("parseRule " + rule.name)) + defer p.out(p.in("recursive " + rule.name)) + } + // {{ end }} ==template== + + var ( + depth = 0 + startMark = p.pt + lastResult = resultTuple{nil, false, startMark} + lastErrors = *p.errs + ) + + for { + // ==template== {{ if or .GlobalState (not .Optimize) }} + lastState := p.cloneState() + // {{ end }} ==template== + p.setMemoized(startMark, rule, lastResult) + val, ok := p.parseRule(rule) + endMark := p.pt + // ==template== {{ if not .Optimize }} + if p.debug { + p.printIndent("RECURSIVE", fmt.Sprintf( + "Rule %s depth %d: %t -> %s", + rule.name, depth, ok, string(p.sliceFrom(startMark)))) + } + // {{ end }} ==template== + if (!ok) || (endMark.offset <= lastResult.end.offset && depth != 0) { + // ==template== {{ if or .GlobalState (not .Optimize) }} + p.restoreState(lastState) + // {{ end }} ==template== + *p.errs = lastErrors + break + } + lastResult = resultTuple{val, ok, endMark} + lastErrors = *p.errs + p.restore(startMark) + depth++ } + p.restore(lastResult.end) + p.setMemoized(startMark, rule, lastResult) + return lastResult.v, lastResult.b +} + +func (p *parser) parseRuleRecursiveNoLeader(rule *rule) (any, bool) { + return p.parseRule(rule) +} + +// {{ end }} ==template== + +// ==template== {{ if not .Optimize }} +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +// {{ end }} ==template== + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { + // ==template== {{ if not .Optimize }} + if p.debug { + defer p.out(p.in("parseRule " + rule.name)) + } + // {{ end }} ==template== + var ( + val any + ok bool + // ==template== {{ if not .Optimize }} + startMark = p.pt + // {{ end }} ==template== + ) + + // ==template== {{ if and .LeftRecursion (not .Optimize) }} + if p.memoize || rule.leftRecursive { + if rule.leader { + val, ok = p.parseRuleRecursiveLeader(rule) + } else if p.memoize && !rule.leftRecursive { + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRuleRecursiveNoLeader(rule) + } + } else { + val, ok = p.parseRule(rule) + } + // {{ else if not .Optimize }} if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) + } + // {{ else if .LeftRecursion }} + if rule.leftRecursive { + if rule.leader { + val, ok = p.parseRuleRecursiveLeader(rule) + } else { + val, ok = p.parseRuleRecursiveNoLeader(rule) } + } else { + val, ok = p.parseRule(rule) } + // {{ else }} + val, ok = p.parseRule(rule) + // {{ end }} ==template== - start := p.pt + // ==template== {{ if not .Optimize }} + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } // {{ end }} ==template== + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - // ==template== {{ if not .Optimize }} - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } - // {{ end }} ==template== return val, ok } -// {{ if .Nolint }} nolint: gocyclo {{else}} ==template== {{ end }} -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { // ==template== {{ if not .Optimize }} var pt savepoint + // ==template== {{ if .LeftRecursion }} + isLeftRecusion := p.rstack[len(p.rstack)-1].leftRecursive + if p.memoize && !isLeftRecusion { + // {{ else }} if p.memoize { + // {{ end }} ==template== res, ok := p.getMemoized(expr) if ok { p.restore(res.end) @@ -936,7 +1067,22 @@ func (p *parser) parseExpr(expr any) (any, bool) { } // {{ end }} ==template== + val, ok := p.parseExpr(expr) + // ==template== {{ if not .Optimize }} + // ==template== {{ if .LeftRecursion }} + if p.memoize && !isLeftRecusion { + // {{ else }} + if p.memoize { + // {{ end }} ==template== + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + // {{ end }} ==template== + return val, ok +} + +// {{ if .Nolint }} nolint: gocyclo {{else}} ==template== {{ end }} +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -986,11 +1132,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - // ==template== {{ if not .Optimize }} - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } - // {{ end }} ==template== return val, ok } @@ -1002,7 +1143,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { // {{ end }} ==template== start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1021,7 +1162,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } // ==template== {{ if not .Optimize }} if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } // {{ end }} ==template== return val, ok @@ -1061,7 +1202,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { state := p.cloneState() // {{ end }} ==template== p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() // ==template== {{ if or .GlobalState (not .Optimize) }} p.restoreState(state) @@ -1194,8 +1335,8 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { if p.debug { defer p.out(p.in("parseChoiceExpr")) } - // {{ end }} ==template== + for altI, alt := range ch.alternatives { // dummy assignment to prevent compile error if optimized _ = altI @@ -1205,7 +1346,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { // {{ end }} ==template== p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { // ==template== {{ if not .Optimize }} @@ -1231,7 +1372,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { // {{ end }} ==template== p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1299,7 +1440,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { // {{ end }} ==template== p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() // ==template== {{ if or .GlobalState (not .Optimize) }} @@ -1321,7 +1462,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1343,7 +1484,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { // {{ end }} ==template== p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1365,7 +1506,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1382,7 +1523,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { state := p.cloneState() // {{ end }} ==template== for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { // ==template== {{ if or .GlobalState (not .Optimize) }} p.restoreState(state) @@ -1423,7 +1564,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1443,7 +1584,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1460,7 +1601,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { // {{ end }} ==template== p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/doc.go b/doc.go index bfa9acb..e7842ac 100644 --- a/doc.go +++ b/doc.go @@ -89,6 +89,12 @@ The following options can be specified: necessary if the -optimize-parser flag is set, as some rules may be optimized out of the resulting parser. + -support-left-recursion : boolean, (EXPERIMENTAL FEATURE) if set, add support + for left recursion rules, including those with indirect recursion + (default: false). + E.g.: + expr = expr '*' term / expr '+' term + If the code blocks in the grammar (see below, section "Code block") are golint- and go vet-compliant, then the resulting generated code will also be golint- and go vet-compliant. @@ -171,7 +177,7 @@ on the following: For terminals (character and string literals, character classes and the any matcher), the value is []byte. E.g.: - Rule = label:'a' { // label is []byte } + Rule = label:'a' { // label is []byte } For predicates (& and !), the value is always nil. E.g.: Rule = label:&'a' { // label is nil } @@ -325,13 +331,13 @@ If a non-nil error is returned, it is added to the list of errors that the parser will return, note that the parser does NOT backtrack if a non-nil error is returned. E.g: - Rule = [a] #{ - c.state["a"]++ - if c.state["a"] > 5 { - return fmt.Errorf("we have seen more than 5 a's") // parser will not backtrack - } - return nil - } + Rule = [a] #{ + c.state["a"]++ + if c.state["a"] > 5 { + return fmt.Errorf("we have seen more than 5 a's") // parser will not backtrack + } + return nil + } The "*current" type is a struct that provides four useful fields that can be accessed in action, state change, and predicate code blocks: "pos", "text", "state" and "globalStore". @@ -382,6 +388,23 @@ of pigeon and should not be used nor modified. Those keys are treated as internal implementation details and therefore there are no guarantees given in regards of API stability. +Left recursion + +With options -support-left-recursion pigeon supports left recursion. E.g.: + expr = expr '*' term +Supports indirect recursion: + A = B / D + B = A / C +The implementation is based on the [Left-recursive PEG Grammars][9] article that +links to [Left Recursion in Parsing Expression Grammars][10] and +[Packrat Parsers Can Support Left Recursion][11] papers. + +References: + + [9]: https://medium.com/@gvanrossum_83706/left-recursive-peg-grammars-65dab3c580e1 + [10]: https://arxiv.org/pdf/1207.0443.pdf + [11]: http://web.cs.ucla.edu/~todd/research/pepm08.pdf + Failure labels, throw and recover pigeon supports an extension of the classical PEG syntax called failure labels, @@ -549,32 +572,32 @@ as the Go 1 compatibility [5]. The following lists what part of the current pigeon code falls under that guarantee (features may be added in the future): - - The pigeon command-line flags and arguments: those will not be removed - and will maintain the same semantics. + - The pigeon command-line flags and arguments: those will not be removed + and will maintain the same semantics. - - The explicitly exported API generated by pigeon. See [6] for the - documentation of this API on a generated parser. + - The explicitly exported API generated by pigeon. See [6] for the + documentation of this API on a generated parser. - - The PEG syntax, as documented above. + - The PEG syntax, as documented above. - - The code blocks (except the initializer) will always be generated as - methods on the *current type, and this type is guaranteed to have - the fields pos (type position) and text (type []byte). There are no - guarantees on other fields and methods of this type. + - The code blocks (except the initializer) will always be generated as + methods on the *current type, and this type is guaranteed to have + the fields pos (type position) and text (type []byte). There are no + guarantees on other fields and methods of this type. - - The position type will always have the fields line, col and offset, - all defined as int. There are no guarantees on other fields and methods - of this type. + - The position type will always have the fields line, col and offset, + all defined as int. There are no guarantees on other fields and methods + of this type. - - The type of the error value returned by the Parse* functions, when - not nil, will always be errList defined as a []error. There are no - guarantees on methods of this type, other than the fact it implements the - error interface. + - The type of the error value returned by the Parse* functions, when + not nil, will always be errList defined as a []error. There are no + guarantees on methods of this type, other than the fact it implements the + error interface. - - Individual errors in the errList will always be of type *parserError, - and this type is guaranteed to have an Inner field that contains the - original error value. There are no guarantees on other fields and methods - of this type. + - Individual errors in the errList will always be of type *parserError, + and this type is guaranteed to have an Inner field that contains the + original error value. There are no guarantees on other fields and methods + of this type. The above guarantee is given to the version 1.0 (https://github.com/mna/pigeon/releases/tag/v1.0.0) of pigeon, which has entered maintenance mode (bug fixes only). The current @@ -587,8 +610,8 @@ may occur at any time. References: - [5]: https://golang.org/doc/go1compat - [6]: http://godoc.org/github.com/mna/pigeon/test/predicates + [5]: https://golang.org/doc/go1compat + [6]: http://godoc.org/github.com/mna/pigeon/test/predicates */ package main diff --git a/examples/calculator/calculator.go b/examples/calculator/calculator.go index 4f4000a..a9e484d 100644 --- a/examples/calculator/calculator.go +++ b/examples/calculator/calculator.go @@ -1034,14 +1034,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -1243,7 +1248,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1284,37 +1289,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -1326,6 +1346,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1373,9 +1403,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1385,7 +1412,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1399,7 +1426,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1428,7 +1455,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1546,7 +1573,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1564,7 +1591,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1620,7 +1647,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1638,7 +1665,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1657,7 +1684,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1677,7 +1704,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1690,7 +1717,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1720,7 +1747,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1738,7 +1765,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1753,7 +1780,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/examples/indentation/indentation.go b/examples/indentation/indentation.go index 57a4d48..acab50b 100644 --- a/examples/indentation/indentation.go +++ b/examples/indentation/indentation.go @@ -1347,14 +1347,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -1556,7 +1561,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1597,37 +1602,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -1639,6 +1659,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1686,9 +1716,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1698,7 +1725,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1712,7 +1739,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1741,7 +1768,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1859,7 +1886,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1877,7 +1904,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1933,7 +1960,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1951,7 +1978,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1970,7 +1997,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1990,7 +2017,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -2003,7 +2030,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -2033,7 +2060,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -2051,7 +2078,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -2066,7 +2093,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/examples/json/json.go b/examples/json/json.go index 6e6b17b..c7c1b33 100644 --- a/examples/json/json.go +++ b/examples/json/json.go @@ -1329,14 +1329,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -1538,7 +1543,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1579,37 +1584,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -1621,6 +1641,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1668,9 +1698,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1680,7 +1707,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1694,7 +1721,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1723,7 +1750,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1841,7 +1868,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1859,7 +1886,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1915,7 +1942,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1933,7 +1960,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1952,7 +1979,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1972,7 +1999,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1985,7 +2012,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -2015,7 +2042,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -2033,7 +2060,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -2048,7 +2075,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/examples/json/optimized-grammar/json.go b/examples/json/optimized-grammar/json.go index 2dff3d4..bda15bd 100644 --- a/examples/json/optimized-grammar/json.go +++ b/examples/json/optimized-grammar/json.go @@ -1506,14 +1506,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -1715,7 +1720,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1756,37 +1761,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -1798,6 +1818,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1845,9 +1875,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1857,7 +1884,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1871,7 +1898,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1900,7 +1927,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -2018,7 +2045,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -2036,7 +2063,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -2092,7 +2119,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -2110,7 +2137,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -2129,7 +2156,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -2149,7 +2176,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -2162,7 +2189,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -2192,7 +2219,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -2210,7 +2237,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -2225,7 +2252,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/examples/json/optimized/json.go b/examples/json/optimized/json.go index 013a8c0..5e6ae77 100644 --- a/examples/json/optimized/json.go +++ b/examples/json/optimized/json.go @@ -1361,7 +1361,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1402,18 +1402,34 @@ func listJoin(list []string, sep string, lastSep string) string { } } +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { + var ( + val any + ok bool + ) + + val, ok = p.parseRule(rule) + + return val, ok +} + func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] return val, ok } +func (p *parser) parseExprWrap(expr any) (any, bool) { + val, ok := p.parseExpr(expr) + + return val, ok +} + // nolint: gocyclo func (p *parser) parseExpr(expr any) (any, bool) { - p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1464,7 +1480,7 @@ func (p *parser) parseExpr(expr any) (any, bool) { func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1491,7 +1507,7 @@ func (p *parser) parseAndCodeExpr(and *andCodeExpr) (any, bool) { func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restore(pt) @@ -1584,12 +1600,13 @@ func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (any, bool) { } func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { + for altI, alt := range ch.alternatives { // dummy assignment to prevent compile error if optimized _ = altI p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { return val, ok @@ -1600,7 +1617,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1640,7 +1657,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { pt := p.pt p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restore(pt) @@ -1653,7 +1670,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1669,7 +1686,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1685,7 +1702,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1693,7 +1710,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restore(pt) return nil, false @@ -1707,7 +1724,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1721,7 +1738,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1732,7 +1749,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/main.go b/main.go index e1d3423..582c163 100644 --- a/main.go +++ b/main.go @@ -51,6 +51,7 @@ func main() { optimizeParserFlag = fs.Bool("optimize-parser", false, "generate optimized parser without Debug and Memoize options") recvrNmFlag = fs.String("receiver-name", "c", "receiver name for the generated methods") noBuildFlag = fs.Bool("x", false, "do not build, only parse") + supportLeftRecursion = fs.Bool("support-left-recursion", false, "add support left recursion (EXPERIMENTAL FEATURE)") altEntrypointsFlag ruleNamesFlag ) @@ -136,7 +137,10 @@ func main() { optimizeParser := builder.Optimize(*optimizeParserFlag) basicLatinOptimize := builder.BasicLatinLookupTable(*optimizeBasicLatinFlag) nolintOpt := builder.Nolint(*nolint) - if err := builder.BuildParser(outBuf, grammar, curNmOpt, optimizeParser, basicLatinOptimize, nolintOpt); err != nil { + leftRecursionSupporter := builder.SupportLeftRecursion(*supportLeftRecursion) + if err := builder.BuildParser( + outBuf, grammar, curNmOpt, optimizeParser, basicLatinOptimize, + nolintOpt, leftRecursionSupporter); err != nil { fmt.Fprintln(os.Stderr, "build error: ", err) exit(5) } @@ -207,6 +211,8 @@ the generated code is written to this file instead. comma-separated list of rule names that may be used as alternate entrypoints for the parser, in addition to the first rule in the grammar. + -support-left-recursion + add support left recursion (EXPERIMENTAL FEATURE) See https://godoc.org/github.com/mna/pigeon for more information. ` diff --git a/pigeon.go b/pigeon.go index b38c26d..a49c5c2 100644 --- a/pigeon.go +++ b/pigeon.go @@ -3785,14 +3785,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -3994,7 +3999,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -4035,37 +4040,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -4077,6 +4097,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -4124,9 +4154,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -4136,7 +4163,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -4150,7 +4177,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -4179,7 +4206,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -4297,7 +4324,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -4315,7 +4342,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -4371,7 +4398,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -4389,7 +4416,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -4408,7 +4435,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -4428,7 +4455,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -4441,7 +4468,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -4471,7 +4498,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -4489,7 +4516,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -4504,7 +4531,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/targeted_test.go b/targeted_test.go index de2ee9b..582d7d5 100644 --- a/targeted_test.go +++ b/targeted_test.go @@ -765,8 +765,7 @@ func TestParseChoiceExpr(t *testing.T) { p := newParser("", []byte(tc.in)) // add dummy rule to rule stack of parser - r := rule{name: "dummy"} - p.rstack = append(p.rstack, &r) + p.rstack = append(p.rstack, &rule{name: "dummy"}) // advance to the first rune p.read() diff --git a/test/alternate_entrypoint/altentry.go b/test/alternate_entrypoint/altentry.go index fbe986e..6945c16 100644 --- a/test/alternate_entrypoint/altentry.go +++ b/test/alternate_entrypoint/altentry.go @@ -803,14 +803,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -1012,7 +1017,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1053,37 +1058,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -1095,6 +1115,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1142,9 +1172,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1154,7 +1181,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1168,7 +1195,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1197,7 +1224,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1315,7 +1342,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1333,7 +1360,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1389,7 +1416,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1407,7 +1434,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1426,7 +1453,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1446,7 +1473,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1459,7 +1486,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1489,7 +1516,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1507,7 +1534,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1522,7 +1549,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/andnot/andnot.go b/test/andnot/andnot.go index b0da4b8..98fad74 100644 --- a/test/andnot/andnot.go +++ b/test/andnot/andnot.go @@ -743,14 +743,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -952,7 +957,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -993,37 +998,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -1035,6 +1055,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1082,9 +1112,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1094,7 +1121,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1108,7 +1135,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1137,7 +1164,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1255,7 +1282,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1273,7 +1300,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1329,7 +1356,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1347,7 +1374,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1366,7 +1393,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1386,7 +1413,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1399,7 +1426,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1429,7 +1456,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1447,7 +1474,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1462,7 +1489,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/emptystate/emptystate.go b/test/emptystate/emptystate.go index e4573af..8f1ec33 100644 --- a/test/emptystate/emptystate.go +++ b/test/emptystate/emptystate.go @@ -803,14 +803,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -1012,7 +1017,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1053,37 +1058,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -1095,6 +1115,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1142,9 +1172,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1154,7 +1181,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1168,7 +1195,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1197,7 +1224,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1315,7 +1342,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1333,7 +1360,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1389,7 +1416,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1407,7 +1434,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1426,7 +1453,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1446,7 +1473,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1459,7 +1486,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1489,7 +1516,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1507,7 +1534,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1522,7 +1549,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/errorpos/errorpos.go b/test/errorpos/errorpos.go index 0cda481..f52b598 100644 --- a/test/errorpos/errorpos.go +++ b/test/errorpos/errorpos.go @@ -1182,14 +1182,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -1391,7 +1396,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1432,37 +1437,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -1474,6 +1494,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1521,9 +1551,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1533,7 +1560,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1547,7 +1574,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1576,7 +1603,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1694,7 +1721,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1712,7 +1739,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1768,7 +1795,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1786,7 +1813,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1805,7 +1832,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1825,7 +1852,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1838,7 +1865,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1868,7 +1895,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1886,7 +1913,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1901,7 +1928,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/global_store/global_store.go b/test/global_store/global_store.go index 8768cde..5cce23f 100644 --- a/test/global_store/global_store.go +++ b/test/global_store/global_store.go @@ -764,14 +764,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -973,7 +978,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1014,37 +1019,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -1056,6 +1076,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1103,9 +1133,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1115,7 +1142,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1129,7 +1156,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1158,7 +1185,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1276,7 +1303,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1294,7 +1321,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1350,7 +1377,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1368,7 +1395,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1387,7 +1414,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1407,7 +1434,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1420,7 +1447,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1450,7 +1477,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1468,7 +1495,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1483,7 +1510,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/goto/goto.go b/test/goto/goto.go index 94e07c1..f6b165e 100644 --- a/test/goto/goto.go +++ b/test/goto/goto.go @@ -981,14 +981,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -1190,7 +1195,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1231,37 +1236,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -1273,6 +1293,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1320,9 +1350,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1332,7 +1359,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1346,7 +1373,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1375,7 +1402,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1493,7 +1520,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1511,7 +1538,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1567,7 +1594,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1585,7 +1612,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1604,7 +1631,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1624,7 +1651,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1637,7 +1664,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1667,7 +1694,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1685,7 +1712,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1700,7 +1727,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/goto_state/goto_state.go b/test/goto_state/goto_state.go index 3797cb4..a4a10f2 100644 --- a/test/goto_state/goto_state.go +++ b/test/goto_state/goto_state.go @@ -1009,14 +1009,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -1218,7 +1223,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1259,37 +1264,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -1301,6 +1321,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1348,9 +1378,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1360,7 +1387,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1374,7 +1401,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1403,7 +1430,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1521,7 +1548,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1539,7 +1566,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1595,7 +1622,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1613,7 +1640,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1632,7 +1659,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1652,7 +1679,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1665,7 +1692,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1695,7 +1722,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1713,7 +1740,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1728,7 +1755,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/issue_1/issue_1.go b/test/issue_1/issue_1.go index 66f1b7a..aa6803c 100644 --- a/test/issue_1/issue_1.go +++ b/test/issue_1/issue_1.go @@ -683,14 +683,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -892,7 +897,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -933,37 +938,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -975,6 +995,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1022,9 +1052,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1034,7 +1061,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1048,7 +1075,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1077,7 +1104,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1195,7 +1222,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1213,7 +1240,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1269,7 +1296,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1287,7 +1314,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1306,7 +1333,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1326,7 +1353,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1339,7 +1366,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1369,7 +1396,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1387,7 +1414,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1402,7 +1429,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/issue_18/issue_18.go b/test/issue_18/issue_18.go index ba7e9e7..087e7a2 100644 --- a/test/issue_18/issue_18.go +++ b/test/issue_18/issue_18.go @@ -700,14 +700,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -909,7 +914,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -950,37 +955,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -992,6 +1012,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1039,9 +1069,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1051,7 +1078,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1065,7 +1092,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1094,7 +1121,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1212,7 +1239,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1230,7 +1257,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1286,7 +1313,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1304,7 +1331,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1323,7 +1350,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1343,7 +1370,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1356,7 +1383,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1386,7 +1413,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1404,7 +1431,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1419,7 +1446,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/issue_65/issue_65.go b/test/issue_65/issue_65.go index b41c24f..de35eba 100644 --- a/test/issue_65/issue_65.go +++ b/test/issue_65/issue_65.go @@ -699,14 +699,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -908,7 +913,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -949,37 +954,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -991,6 +1011,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1038,9 +1068,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1050,7 +1077,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1064,7 +1091,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1093,7 +1120,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1211,7 +1238,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1229,7 +1256,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1285,7 +1312,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1303,7 +1330,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1322,7 +1349,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1342,7 +1369,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1355,7 +1382,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1385,7 +1412,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1403,7 +1430,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1418,7 +1445,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/issue_65/optimized-grammar/issue_65.go b/test/issue_65/optimized-grammar/issue_65.go index 2cbacf1..b828da0 100644 --- a/test/issue_65/optimized-grammar/issue_65.go +++ b/test/issue_65/optimized-grammar/issue_65.go @@ -685,14 +685,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -894,7 +899,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -935,37 +940,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -977,6 +997,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1024,9 +1054,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1036,7 +1063,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1050,7 +1077,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1079,7 +1106,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1197,7 +1224,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1215,7 +1242,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1271,7 +1298,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1289,7 +1316,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1308,7 +1335,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1328,7 +1355,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1341,7 +1368,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1371,7 +1398,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1389,7 +1416,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1404,7 +1431,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/issue_65/optimized/issue_65.go b/test/issue_65/optimized/issue_65.go index 81f882b..6d5717b 100644 --- a/test/issue_65/optimized/issue_65.go +++ b/test/issue_65/optimized/issue_65.go @@ -724,7 +724,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -765,18 +765,34 @@ func listJoin(list []string, sep string, lastSep string) string { } } +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { + var ( + val any + ok bool + ) + + val, ok = p.parseRule(rule) + + return val, ok +} + func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] return val, ok } +func (p *parser) parseExprWrap(expr any) (any, bool) { + val, ok := p.parseExpr(expr) + + return val, ok +} + // nolint: gocyclo func (p *parser) parseExpr(expr any) (any, bool) { - p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -827,7 +843,7 @@ func (p *parser) parseExpr(expr any) (any, bool) { func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -854,7 +870,7 @@ func (p *parser) parseAndCodeExpr(and *andCodeExpr) (any, bool) { func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restore(pt) @@ -947,12 +963,13 @@ func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (any, bool) { } func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { + for altI, alt := range ch.alternatives { // dummy assignment to prevent compile error if optimized _ = altI p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { return val, ok @@ -963,7 +980,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1003,7 +1020,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { pt := p.pt p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restore(pt) @@ -1016,7 +1033,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1032,7 +1049,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1048,7 +1065,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1056,7 +1073,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restore(pt) return nil, false @@ -1070,7 +1087,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1084,7 +1101,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1095,7 +1112,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/issue_70/issue_70.go b/test/issue_70/issue_70.go index 4e2b1c3..cc80b50 100644 --- a/test/issue_70/issue_70.go +++ b/test/issue_70/issue_70.go @@ -674,14 +674,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -883,7 +888,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -924,37 +929,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -966,6 +986,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1013,9 +1043,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1025,7 +1052,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1039,7 +1066,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1068,7 +1095,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1186,7 +1213,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1204,7 +1231,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1260,7 +1287,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1278,7 +1305,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1297,7 +1324,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1317,7 +1344,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1330,7 +1357,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1360,7 +1387,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1378,7 +1405,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1393,7 +1420,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/issue_70/optimized-grammar/issue_70.go b/test/issue_70/optimized-grammar/issue_70.go index 59ff153..0821265 100644 --- a/test/issue_70/optimized-grammar/issue_70.go +++ b/test/issue_70/optimized-grammar/issue_70.go @@ -658,14 +658,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -867,7 +872,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -908,37 +913,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -950,6 +970,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -997,9 +1027,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1009,7 +1036,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1023,7 +1050,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1052,7 +1079,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1170,7 +1197,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1188,7 +1215,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1244,7 +1271,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1262,7 +1289,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1281,7 +1308,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1301,7 +1328,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1314,7 +1341,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1344,7 +1371,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1362,7 +1389,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1377,7 +1404,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/issue_70/optimized/issue_70.go b/test/issue_70/optimized/issue_70.go index b7012fd..09b9533 100644 --- a/test/issue_70/optimized/issue_70.go +++ b/test/issue_70/optimized/issue_70.go @@ -699,7 +699,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -740,18 +740,34 @@ func listJoin(list []string, sep string, lastSep string) string { } } +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { + var ( + val any + ok bool + ) + + val, ok = p.parseRule(rule) + + return val, ok +} + func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] return val, ok } +func (p *parser) parseExprWrap(expr any) (any, bool) { + val, ok := p.parseExpr(expr) + + return val, ok +} + // nolint: gocyclo func (p *parser) parseExpr(expr any) (any, bool) { - p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -802,7 +818,7 @@ func (p *parser) parseExpr(expr any) (any, bool) { func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -829,7 +845,7 @@ func (p *parser) parseAndCodeExpr(and *andCodeExpr) (any, bool) { func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restore(pt) @@ -922,12 +938,13 @@ func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (any, bool) { } func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { + for altI, alt := range ch.alternatives { // dummy assignment to prevent compile error if optimized _ = altI p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { return val, ok @@ -938,7 +955,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -978,7 +995,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { pt := p.pt p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restore(pt) @@ -991,7 +1008,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1007,7 +1024,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1023,7 +1040,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1031,7 +1048,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restore(pt) return nil, false @@ -1045,7 +1062,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1059,7 +1076,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1070,7 +1087,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/issue_70b/issue_70b.go b/test/issue_70b/issue_70b.go index d6e5b69..033621c 100644 --- a/test/issue_70b/issue_70b.go +++ b/test/issue_70b/issue_70b.go @@ -32,6 +32,8 @@ var g = &grammar{ }, }, }, + leader: false, + leftRecursive: false, }, { name: "D", @@ -55,6 +57,8 @@ var g = &grammar{ }, }, }, + leader: true, + leftRecursive: true, }, }, } @@ -316,6 +320,9 @@ type rule struct { name string displayName string expr any + + leader bool + leftRecursive bool } // nolint: structcheck @@ -548,6 +555,11 @@ type Stats struct { ChoiceAltCnt map[string]map[string]int } +type ruleWithExpsStack struct { + rule *rule + estack []any +} + // nolint: structcheck,maligned type parser struct { filename string @@ -659,14 +671,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -868,7 +885,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -909,40 +926,110 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleRecursiveLeader(rule *rule) (any, bool) { + result, ok := p.getMemoized(rule) + if ok { + p.restore(result.end) + return result.v, result.b + } + if p.debug { - defer p.out(p.in("parseRule " + rule.name)) + defer p.out(p.in("recursive " + rule.name)) } - if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b + var ( + depth = 0 + startMark = p.pt + lastResult = resultTuple{nil, false, startMark} + lastErrors = *p.errs + ) + + for { + lastState := p.cloneState() + p.setMemoized(startMark, rule, lastResult) + val, ok := p.parseRule(rule) + endMark := p.pt + if p.debug { + p.printIndent("RECURSIVE", fmt.Sprintf( + "Rule %s depth %d: %t -> %s", + rule.name, depth, ok, string(p.sliceFrom(startMark)))) } + if (!ok) || (endMark.offset <= lastResult.end.offset && depth != 0) { + p.restoreState(lastState) + *p.errs = lastErrors + break + } + lastResult = resultTuple{val, ok, endMark} + lastErrors = *p.errs + p.restore(startMark) + depth++ } - start := p.pt + p.restore(lastResult.end) + p.setMemoized(startMark, rule, lastResult) + return lastResult.v, lastResult.b +} + +func (p *parser) parseRuleRecursiveNoLeader(rule *rule) (any, bool) { + return p.parseRule(rule) +} + +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { + if p.debug { + defer p.out(p.in("parseRule " + rule.name)) + } + var ( + val any + ok bool + startMark = p.pt + ) + + if p.memoize || rule.leftRecursive { + if rule.leader { + val, ok = p.parseRuleRecursiveLeader(rule) + } else if p.memoize && !rule.leftRecursive { + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRuleRecursiveNoLeader(rule) + } + } else { + val, ok = p.parseRule(rule) + } + + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint - if p.memoize { + isLeftRecusion := p.rstack[len(p.rstack)-1].leftRecursive + if p.memoize && !isLeftRecusion { res, ok := p.getMemoized(expr) if ok { p.restore(res.end) @@ -951,6 +1038,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize && !isLeftRecusion { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -998,9 +1095,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1010,7 +1104,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1024,7 +1118,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1053,7 +1147,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1171,7 +1265,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1189,7 +1283,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1245,7 +1339,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1263,7 +1357,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1282,7 +1376,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1302,7 +1396,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1315,7 +1409,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1345,7 +1439,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1363,7 +1457,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1378,7 +1472,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/issue_79/issue_79.go b/test/issue_79/issue_79.go new file mode 100644 index 0000000..3d9f8c0 --- /dev/null +++ b/test/issue_79/issue_79.go @@ -0,0 +1,1591 @@ +// Code generated by pigeon; DO NOT EDIT. + +package issue79 + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "os" + "sort" + "strconv" + "strings" + "sync" + "unicode" + "unicode/utf8" +) + +var g = &grammar{ + rules: []*rule{ + { + name: "Input", + pos: position{line: 5, col: 1, offset: 22}, + expr: &actionExpr{ + pos: position{line: 5, col: 10, offset: 31}, + run: (*parser).callonInput1, + expr: &seqExpr{ + pos: position{line: 5, col: 10, offset: 31}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 5, col: 10, offset: 31}, + label: "expr", + expr: &ruleRefExpr{ + pos: position{line: 5, col: 15, offset: 36}, + name: "Expr", + }, + }, + &ruleRefExpr{ + pos: position{line: 5, col: 20, offset: 41}, + name: "EOF", + }, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "Expr", + pos: position{line: 9, col: 1, offset: 68}, + expr: &choiceExpr{ + pos: position{line: 9, col: 9, offset: 76}, + alternatives: []any{ + &seqExpr{ + pos: position{line: 9, col: 9, offset: 76}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 9, col: 9, offset: 76}, + name: "_", + }, + &ruleRefExpr{ + pos: position{line: 9, col: 11, offset: 78}, + name: "Expr", + }, + &ruleRefExpr{ + pos: position{line: 9, col: 16, offset: 83}, + name: "_", + }, + &ruleRefExpr{ + pos: position{line: 9, col: 18, offset: 85}, + name: "LogicOp", + }, + &ruleRefExpr{ + pos: position{line: 9, col: 26, offset: 93}, + name: "_", + }, + &ruleRefExpr{ + pos: position{line: 9, col: 28, offset: 95}, + name: "Expr", + }, + &ruleRefExpr{ + pos: position{line: 9, col: 33, offset: 100}, + name: "_", + }, + }, + }, + &seqExpr{ + pos: position{line: 9, col: 36, offset: 103}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 9, col: 36, offset: 103}, + name: "_", + }, + &ruleRefExpr{ + pos: position{line: 9, col: 38, offset: 105}, + name: "Value", + }, + &ruleRefExpr{ + pos: position{line: 9, col: 44, offset: 111}, + name: "_", + }, + }, + }, + }, + }, + leader: true, + leftRecursive: true, + }, + { + name: "LogicOp", + pos: position{line: 11, col: 1, offset: 114}, + expr: &actionExpr{ + pos: position{line: 11, col: 12, offset: 125}, + run: (*parser).callonLogicOp1, + expr: &choiceExpr{ + pos: position{line: 11, col: 13, offset: 126}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 11, col: 13, offset: 126}, + val: "and", + ignoreCase: false, + want: "\"and\"", + }, + &litMatcher{ + pos: position{line: 11, col: 21, offset: 134}, + val: "or", + ignoreCase: false, + want: "\"or\"", + }, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "Value", + pos: position{line: 15, col: 1, offset: 173}, + expr: &actionExpr{ + pos: position{line: 15, col: 10, offset: 182}, + run: (*parser).callonValue1, + expr: &oneOrMoreExpr{ + pos: position{line: 15, col: 10, offset: 182}, + expr: &charClassMatcher{ + pos: position{line: 15, col: 10, offset: 182}, + val: "[0-9]", + ranges: []rune{'0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "_", + displayName: "\"whitespace\"", + pos: position{line: 19, col: 1, offset: 221}, + expr: &zeroOrMoreExpr{ + pos: position{line: 19, col: 19, offset: 239}, + expr: &charClassMatcher{ + pos: position{line: 19, col: 19, offset: 239}, + val: "[ \\n\\t\\r]", + chars: []rune{' ', '\n', '\t', '\r'}, + ignoreCase: false, + inverted: false, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "EOF", + pos: position{line: 21, col: 1, offset: 251}, + expr: ¬Expr{ + pos: position{line: 21, col: 8, offset: 258}, + expr: &anyMatcher{ + line: 21, col: 9, offset: 259, + }, + }, + leader: false, + leftRecursive: false, + }, + }, +} + +func (c *current) onInput1(expr any) (any, error) { + return expr, nil +} + +func (p *parser) callonInput1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onInput1(stack["expr"]) +} + +func (c *current) onLogicOp1() (any, error) { + return string(c.text), nil +} + +func (p *parser) callonLogicOp1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onLogicOp1() +} + +func (c *current) onValue1() (any, error) { + return string(c.text), nil +} + +func (p *parser) callonValue1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onValue1() +} + +var ( + // errNoRule is returned when the grammar to parse has no rule. + errNoRule = errors.New("grammar has no rule") + + // errInvalidEntrypoint is returned when the specified entrypoint rule + // does not exit. + errInvalidEntrypoint = errors.New("invalid entrypoint") + + // errInvalidEncoding is returned when the source is not properly + // utf8-encoded. + errInvalidEncoding = errors.New("invalid encoding") + + // errMaxExprCnt is used to signal that the maximum number of + // expressions have been parsed. + errMaxExprCnt = errors.New("max number of expresssions parsed") +) + +// Option is a function that can set an option on the parser. It returns +// the previous setting as an Option. +type Option func(*parser) Option + +// MaxExpressions creates an Option to stop parsing after the provided +// number of expressions have been parsed, if the value is 0 then the parser will +// parse for as many steps as needed (possibly an infinite number). +// +// The default for maxExprCnt is 0. +func MaxExpressions(maxExprCnt uint64) Option { + return func(p *parser) Option { + oldMaxExprCnt := p.maxExprCnt + p.maxExprCnt = maxExprCnt + return MaxExpressions(oldMaxExprCnt) + } +} + +// Entrypoint creates an Option to set the rule name to use as entrypoint. +// The rule name must have been specified in the -alternate-entrypoints +// if generating the parser with the -optimize-grammar flag, otherwise +// it may have been optimized out. Passing an empty string sets the +// entrypoint to the first rule in the grammar. +// +// The default is to start parsing at the first rule in the grammar. +func Entrypoint(ruleName string) Option { + return func(p *parser) Option { + oldEntrypoint := p.entrypoint + p.entrypoint = ruleName + if ruleName == "" { + p.entrypoint = g.rules[0].name + } + return Entrypoint(oldEntrypoint) + } +} + +// Statistics adds a user provided Stats struct to the parser to allow +// the user to process the results after the parsing has finished. +// Also the key for the "no match" counter is set. +// +// Example usage: +// +// input := "input" +// stats := Stats{} +// _, err := Parse("input-file", []byte(input), Statistics(&stats, "no match")) +// if err != nil { +// log.Panicln(err) +// } +// b, err := json.MarshalIndent(stats.ChoiceAltCnt, "", " ") +// if err != nil { +// log.Panicln(err) +// } +// fmt.Println(string(b)) +func Statistics(stats *Stats, choiceNoMatch string) Option { + return func(p *parser) Option { + oldStats := p.Stats + p.Stats = stats + oldChoiceNoMatch := p.choiceNoMatch + p.choiceNoMatch = choiceNoMatch + if p.Stats.ChoiceAltCnt == nil { + p.Stats.ChoiceAltCnt = make(map[string]map[string]int) + } + return Statistics(oldStats, oldChoiceNoMatch) + } +} + +// Debug creates an Option to set the debug flag to b. When set to true, +// debugging information is printed to stdout while parsing. +// +// The default is false. +func Debug(b bool) Option { + return func(p *parser) Option { + old := p.debug + p.debug = b + return Debug(old) + } +} + +// Memoize creates an Option to set the memoize flag to b. When set to true, +// the parser will cache all results so each expression is evaluated only +// once. This guarantees linear parsing time even for pathological cases, +// at the expense of more memory and slower times for typical cases. +// +// The default is false. +func Memoize(b bool) Option { + return func(p *parser) Option { + old := p.memoize + p.memoize = b + return Memoize(old) + } +} + +// AllowInvalidUTF8 creates an Option to allow invalid UTF-8 bytes. +// Every invalid UTF-8 byte is treated as a utf8.RuneError (U+FFFD) +// by character class matchers and is matched by the any matcher. +// The returned matched value, c.text and c.offset are NOT affected. +// +// The default is false. +func AllowInvalidUTF8(b bool) Option { + return func(p *parser) Option { + old := p.allowInvalidUTF8 + p.allowInvalidUTF8 = b + return AllowInvalidUTF8(old) + } +} + +// Recover creates an Option to set the recover flag to b. When set to +// true, this causes the parser to recover from panics and convert it +// to an error. Setting it to false can be useful while debugging to +// access the full stack trace. +// +// The default is true. +func Recover(b bool) Option { + return func(p *parser) Option { + old := p.recover + p.recover = b + return Recover(old) + } +} + +// GlobalStore creates an Option to set a key to a certain value in +// the globalStore. +func GlobalStore(key string, value any) Option { + return func(p *parser) Option { + old := p.cur.globalStore[key] + p.cur.globalStore[key] = value + return GlobalStore(key, old) + } +} + +// InitState creates an Option to set a key to a certain value in +// the global "state" store. +func InitState(key string, value any) Option { + return func(p *parser) Option { + old := p.cur.state[key] + p.cur.state[key] = value + return InitState(key, old) + } +} + +// ParseFile parses the file identified by filename. +func ParseFile(filename string, opts ...Option) (i any, err error) { + f, err := os.Open(filename) + if err != nil { + return nil, err + } + defer func() { + if closeErr := f.Close(); closeErr != nil { + err = closeErr + } + }() + return ParseReader(filename, f, opts...) +} + +// ParseReader parses the data from r using filename as information in the +// error messages. +func ParseReader(filename string, r io.Reader, opts ...Option) (any, error) { + b, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + return Parse(filename, b, opts...) +} + +// Parse parses the data from b using filename as information in the +// error messages. +func Parse(filename string, b []byte, opts ...Option) (any, error) { + return newParser(filename, b, opts...).parse(g) +} + +// position records a position in the text. +type position struct { + line, col, offset int +} + +func (p position) String() string { + return strconv.Itoa(p.line) + ":" + strconv.Itoa(p.col) + " [" + strconv.Itoa(p.offset) + "]" +} + +// savepoint stores all state required to go back to this point in the +// parser. +type savepoint struct { + position + rn rune + w int +} + +type current struct { + pos position // start position of the match + text []byte // raw text of the match + + // state is a store for arbitrary key,value pairs that the user wants to be + // tied to the backtracking of the parser. + // This is always rolled back if a parsing rule fails. + state storeDict + + // globalStore is a general store for the user to store arbitrary key-value + // pairs that they need to manage and that they do not want tied to the + // backtracking of the parser. This is only modified by the user and never + // rolled back by the parser. It is always up to the user to keep this in a + // consistent state. + globalStore storeDict +} + +type storeDict map[string]any + +// the AST types... + +type grammar struct { + pos position + rules []*rule +} + +type rule struct { + pos position + name string + displayName string + expr any + + leader bool + leftRecursive bool +} + +type choiceExpr struct { + pos position + alternatives []any +} + +type actionExpr struct { + pos position + expr any + run func(*parser) (any, error) +} + +type recoveryExpr struct { + pos position + expr any + recoverExpr any + failureLabel []string +} + +type seqExpr struct { + pos position + exprs []any +} + +type throwExpr struct { + pos position + label string +} + +type labeledExpr struct { + pos position + label string + expr any +} + +type expr struct { + pos position + expr any +} + +type ( + andExpr expr + notExpr expr + zeroOrOneExpr expr + zeroOrMoreExpr expr + oneOrMoreExpr expr +) + +type ruleRefExpr struct { + pos position + name string +} + +type stateCodeExpr struct { + pos position + run func(*parser) error +} + +type andCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +type notCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +type litMatcher struct { + pos position + val string + ignoreCase bool + want string +} + +type charClassMatcher struct { + pos position + val string + basicLatinChars [128]bool + chars []rune + ranges []rune + classes []*unicode.RangeTable + ignoreCase bool + inverted bool +} + +type anyMatcher position + +// errList cumulates the errors found by the parser. +type errList []error + +func (e *errList) add(err error) { + *e = append(*e, err) +} + +func (e errList) err() error { + if len(e) == 0 { + return nil + } + e.dedupe() + return e +} + +func (e *errList) dedupe() { + var cleaned []error + set := make(map[string]bool) + for _, err := range *e { + if msg := err.Error(); !set[msg] { + set[msg] = true + cleaned = append(cleaned, err) + } + } + *e = cleaned +} + +func (e errList) Error() string { + switch len(e) { + case 0: + return "" + case 1: + return e[0].Error() + default: + var buf bytes.Buffer + + for i, err := range e { + if i > 0 { + buf.WriteRune('\n') + } + buf.WriteString(err.Error()) + } + return buf.String() + } +} + +// parserError wraps an error with a prefix indicating the rule in which +// the error occurred. The original error is stored in the Inner field. +type parserError struct { + Inner error + pos position + prefix string + expected []string +} + +// Error returns the error message. +func (p *parserError) Error() string { + return p.prefix + ": " + p.Inner.Error() +} + +// newParser creates a parser with the specified input source and options. +func newParser(filename string, b []byte, opts ...Option) *parser { + stats := Stats{ + ChoiceAltCnt: make(map[string]map[string]int), + } + + p := &parser{ + filename: filename, + errs: new(errList), + data: b, + pt: savepoint{position: position{line: 1}}, + recover: true, + cur: current{ + state: make(storeDict), + globalStore: make(storeDict), + }, + maxFailPos: position{col: 1, line: 1}, + maxFailExpected: make([]string, 0, 20), + Stats: &stats, + // start rule is rule [0] unless an alternate entrypoint is specified + entrypoint: g.rules[0].name, + } + p.setOptions(opts) + + if p.maxExprCnt == 0 { + p.maxExprCnt = math.MaxUint64 + } + + return p +} + +// setOptions applies the options to the parser. +func (p *parser) setOptions(opts []Option) { + for _, opt := range opts { + opt(p) + } +} + +type resultTuple struct { + v any + b bool + end savepoint +} + +const choiceNoMatch = -1 + +// Stats stores some statistics, gathered during parsing +type Stats struct { + // ExprCnt counts the number of expressions processed during parsing + // This value is compared to the maximum number of expressions allowed + // (set by the MaxExpressions option). + ExprCnt uint64 + + // ChoiceAltCnt is used to count for each ordered choice expression, + // which alternative is used how may times. + // These numbers allow to optimize the order of the ordered choice expression + // to increase the performance of the parser + // + // The outer key of ChoiceAltCnt is composed of the name of the rule as well + // as the line and the column of the ordered choice. + // The inner key of ChoiceAltCnt is the number (one-based) of the matching alternative. + // For each alternative the number of matches are counted. If an ordered choice does not + // match, a special counter is incremented. The name of this counter is set with + // the parser option Statistics. + // For an alternative to be included in ChoiceAltCnt, it has to match at least once. + ChoiceAltCnt map[string]map[string]int +} + +type ruleWithExpsStack struct { + rule *rule + estack []any +} + +type parser struct { + filename string + pt savepoint + cur current + + data []byte + errs *errList + + depth int + recover bool + debug bool + + memoize bool + // memoization table for the packrat algorithm: + // map[offset in source] map[expression or rule] {value, match} + memo map[int]map[any]resultTuple + + // rules table, maps the rule identifier to the rule node + rules map[string]*rule + // variables stack, map of label to value + vstack []map[string]any + // rule stack, allows identification of the current rule in errors + rstack []*rule + + // parse fail + maxFailPos position + maxFailExpected []string + maxFailInvertExpected bool + + // max number of expressions to be parsed + maxExprCnt uint64 + // entrypoint for the parser + entrypoint string + + allowInvalidUTF8 bool + + *Stats + + choiceNoMatch string + // recovery expression stack, keeps track of the currently available recovery expression, these are traversed in reverse + recoveryStack []map[string]any +} + +// push a variable set on the vstack. +func (p *parser) pushV() { + if cap(p.vstack) == len(p.vstack) { + // create new empty slot in the stack + p.vstack = append(p.vstack, nil) + } else { + // slice to 1 more + p.vstack = p.vstack[:len(p.vstack)+1] + } + + // get the last args set + m := p.vstack[len(p.vstack)-1] + if m != nil && len(m) == 0 { + // empty map, all good + return + } + + m = make(map[string]any) + p.vstack[len(p.vstack)-1] = m +} + +// pop a variable set from the vstack. +func (p *parser) popV() { + // if the map is not empty, clear it + m := p.vstack[len(p.vstack)-1] + if len(m) > 0 { + // GC that map + p.vstack[len(p.vstack)-1] = nil + } + p.vstack = p.vstack[:len(p.vstack)-1] +} + +// push a recovery expression with its labels to the recoveryStack +func (p *parser) pushRecovery(labels []string, expr any) { + if cap(p.recoveryStack) == len(p.recoveryStack) { + // create new empty slot in the stack + p.recoveryStack = append(p.recoveryStack, nil) + } else { + // slice to 1 more + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)+1] + } + + m := make(map[string]any, len(labels)) + for _, fl := range labels { + m[fl] = expr + } + p.recoveryStack[len(p.recoveryStack)-1] = m +} + +// pop a recovery expression from the recoveryStack +func (p *parser) popRecovery() { + // GC that map + p.recoveryStack[len(p.recoveryStack)-1] = nil + + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)-1] +} + +func (p *parser) print(prefix, s string) string { + if !p.debug { + return s + } + + fmt.Printf("%s %d:%d:%d: %s [%#U]\n", + prefix, p.pt.line, p.pt.col, p.pt.offset, s, p.pt.rn) + return s +} + +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + +func (p *parser) in(s string) string { + res := p.printIndent(">", s) + p.depth++ + return res +} + +func (p *parser) out(s string) string { + p.depth-- + return p.printIndent("<", s) +} + +func (p *parser) addErr(err error) { + p.addErrAt(err, p.pt.position, []string{}) +} + +func (p *parser) addErrAt(err error, pos position, expected []string) { + var buf bytes.Buffer + if p.filename != "" { + buf.WriteString(p.filename) + } + if buf.Len() > 0 { + buf.WriteString(":") + } + buf.WriteString(fmt.Sprintf("%d:%d (%d)", pos.line, pos.col, pos.offset)) + if len(p.rstack) > 0 { + if buf.Len() > 0 { + buf.WriteString(": ") + } + rule := p.rstack[len(p.rstack)-1] + if rule.displayName != "" { + buf.WriteString("rule " + rule.displayName) + } else { + buf.WriteString("rule " + rule.name) + } + } + pe := &parserError{Inner: err, pos: pos, prefix: buf.String(), expected: expected} + p.errs.add(pe) +} + +func (p *parser) failAt(fail bool, pos position, want string) { + // process fail if parsing fails and not inverted or parsing succeeds and invert is set + if fail == p.maxFailInvertExpected { + if pos.offset < p.maxFailPos.offset { + return + } + + if pos.offset > p.maxFailPos.offset { + p.maxFailPos = pos + p.maxFailExpected = p.maxFailExpected[:0] + } + + if p.maxFailInvertExpected { + want = "!" + want + } + p.maxFailExpected = append(p.maxFailExpected, want) + } +} + +// read advances the parser to the next rune. +func (p *parser) read() { + p.pt.offset += p.pt.w + rn, n := utf8.DecodeRune(p.data[p.pt.offset:]) + p.pt.rn = rn + p.pt.w = n + p.pt.col++ + if rn == '\n' { + p.pt.line++ + p.pt.col = 0 + } + + if rn == utf8.RuneError && n == 1 { // see utf8.DecodeRune + if !p.allowInvalidUTF8 { + p.addErr(errInvalidEncoding) + } + } +} + +// restore parser position to the savepoint pt. +func (p *parser) restore(pt savepoint) { + if p.debug { + defer p.out(p.in("restore")) + } + if pt.offset == p.pt.offset { + return + } + p.pt = pt +} + +// Cloner is implemented by any value that has a Clone method, which returns a +// copy of the value. This is mainly used for types which are not passed by +// value (e.g map, slice, chan) or structs that contain such types. +// +// This is used in conjunction with the global state feature to create proper +// copies of the state to allow the parser to properly restore the state in +// the case of backtracking. +type Cloner interface { + Clone() any +} + +var statePool = &sync.Pool{ + New: func() any { return make(storeDict) }, +} + +func (sd storeDict) Discard() { + for k := range sd { + delete(sd, k) + } + statePool.Put(sd) +} + +// clone and return parser current state. +func (p *parser) cloneState() storeDict { + if p.debug { + defer p.out(p.in("cloneState")) + } + + state := statePool.Get().(storeDict) + for k, v := range p.cur.state { + if c, ok := v.(Cloner); ok { + state[k] = c.Clone() + } else { + state[k] = v + } + } + return state +} + +// restore parser current state to the state storeDict. +// every restoreState should applied only one time for every cloned state +func (p *parser) restoreState(state storeDict) { + if p.debug { + defer p.out(p.in("restoreState")) + } + p.cur.state.Discard() + p.cur.state = state +} + +// get the slice of bytes from the savepoint start to the current position. +func (p *parser) sliceFrom(start savepoint) []byte { + return p.data[start.position.offset:p.pt.position.offset] +} + +func (p *parser) getMemoized(node any) (resultTuple, bool) { + if len(p.memo) == 0 { + return resultTuple{}, false + } + m := p.memo[p.pt.offset] + if len(m) == 0 { + return resultTuple{}, false + } + res, ok := m[node] + return res, ok +} + +func (p *parser) setMemoized(pt savepoint, node any, tuple resultTuple) { + if p.memo == nil { + p.memo = make(map[int]map[any]resultTuple) + } + m := p.memo[pt.offset] + if m == nil { + m = make(map[any]resultTuple) + p.memo[pt.offset] = m + } + m[node] = tuple +} + +func (p *parser) buildRulesTable(g *grammar) { + p.rules = make(map[string]*rule, len(g.rules)) + for _, r := range g.rules { + p.rules[r.name] = r + } +} + +func (p *parser) parse(g *grammar) (val any, err error) { + if len(g.rules) == 0 { + p.addErr(errNoRule) + return nil, p.errs.err() + } + + // TODO : not super critical but this could be generated + p.buildRulesTable(g) + + if p.recover { + // panic can be used in action code to stop parsing immediately + // and return the panic as an error. + defer func() { + if e := recover(); e != nil { + if p.debug { + defer p.out(p.in("panic handler")) + } + val = nil + switch e := e.(type) { + case error: + p.addErr(e) + default: + p.addErr(fmt.Errorf("%v", e)) + } + err = p.errs.err() + } + }() + } + + startRule, ok := p.rules[p.entrypoint] + if !ok { + p.addErr(errInvalidEntrypoint) + return nil, p.errs.err() + } + + p.read() // advance to first rune + val, ok = p.parseRuleWrap(startRule) + if !ok { + if len(*p.errs) == 0 { + // If parsing fails, but no errors have been recorded, the expected values + // for the farthest parser position are returned as error. + maxFailExpectedMap := make(map[string]struct{}, len(p.maxFailExpected)) + for _, v := range p.maxFailExpected { + maxFailExpectedMap[v] = struct{}{} + } + expected := make([]string, 0, len(maxFailExpectedMap)) + eof := false + if _, ok := maxFailExpectedMap["!."]; ok { + delete(maxFailExpectedMap, "!.") + eof = true + } + for k := range maxFailExpectedMap { + expected = append(expected, k) + } + sort.Strings(expected) + if eof { + expected = append(expected, "EOF") + } + p.addErrAt(errors.New("no match found, expected: "+listJoin(expected, ", ", "or")), p.maxFailPos, expected) + } + + return nil, p.errs.err() + } + return val, p.errs.err() +} + +func listJoin(list []string, sep string, lastSep string) string { + switch len(list) { + case 0: + return "" + case 1: + return list[0] + default: + return strings.Join(list[:len(list)-1], sep) + " " + lastSep + " " + list[len(list)-1] + } +} + +func (p *parser) parseRuleRecursiveLeader(rule *rule) (any, bool) { + result, ok := p.getMemoized(rule) + if ok { + p.restore(result.end) + return result.v, result.b + } + + if p.debug { + defer p.out(p.in("recursive " + rule.name)) + } + + var ( + depth = 0 + startMark = p.pt + lastResult = resultTuple{nil, false, startMark} + lastErrors = *p.errs + ) + + for { + lastState := p.cloneState() + p.setMemoized(startMark, rule, lastResult) + val, ok := p.parseRule(rule) + endMark := p.pt + if p.debug { + p.printIndent("RECURSIVE", fmt.Sprintf( + "Rule %s depth %d: %t -> %s", + rule.name, depth, ok, string(p.sliceFrom(startMark)))) + } + if (!ok) || (endMark.offset <= lastResult.end.offset && depth != 0) { + p.restoreState(lastState) + *p.errs = lastErrors + break + } + lastResult = resultTuple{val, ok, endMark} + lastErrors = *p.errs + p.restore(startMark) + depth++ + } + + p.restore(lastResult.end) + p.setMemoized(startMark, rule, lastResult) + return lastResult.v, lastResult.b +} + +func (p *parser) parseRuleRecursiveNoLeader(rule *rule) (any, bool) { + return p.parseRule(rule) +} + +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { + if p.debug { + defer p.out(p.in("parseRule " + rule.name)) + } + var ( + val any + ok bool + startMark = p.pt + ) + + if p.memoize || rule.leftRecursive { + if rule.leader { + val, ok = p.parseRuleRecursiveLeader(rule) + } else if p.memoize && !rule.leftRecursive { + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRuleRecursiveNoLeader(rule) + } + } else { + val, ok = p.parseRule(rule) + } + + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { + p.rstack = append(p.rstack, rule) + p.pushV() + val, ok := p.parseExprWrap(rule.expr) + p.popV() + p.rstack = p.rstack[:len(p.rstack)-1] + return val, ok +} + +func (p *parser) parseExprWrap(expr any) (any, bool) { + var pt savepoint + + isLeftRecusion := p.rstack[len(p.rstack)-1].leftRecursive + if p.memoize && !isLeftRecusion { + res, ok := p.getMemoized(expr) + if ok { + p.restore(res.end) + return res.v, res.b + } + pt = p.pt + } + + val, ok := p.parseExpr(expr) + + if p.memoize && !isLeftRecusion { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +func (p *parser) parseExpr(expr any) (any, bool) { + p.ExprCnt++ + if p.ExprCnt > p.maxExprCnt { + panic(errMaxExprCnt) + } + + var val any + var ok bool + switch expr := expr.(type) { + case *actionExpr: + val, ok = p.parseActionExpr(expr) + case *andCodeExpr: + val, ok = p.parseAndCodeExpr(expr) + case *andExpr: + val, ok = p.parseAndExpr(expr) + case *anyMatcher: + val, ok = p.parseAnyMatcher(expr) + case *charClassMatcher: + val, ok = p.parseCharClassMatcher(expr) + case *choiceExpr: + val, ok = p.parseChoiceExpr(expr) + case *labeledExpr: + val, ok = p.parseLabeledExpr(expr) + case *litMatcher: + val, ok = p.parseLitMatcher(expr) + case *notCodeExpr: + val, ok = p.parseNotCodeExpr(expr) + case *notExpr: + val, ok = p.parseNotExpr(expr) + case *oneOrMoreExpr: + val, ok = p.parseOneOrMoreExpr(expr) + case *recoveryExpr: + val, ok = p.parseRecoveryExpr(expr) + case *ruleRefExpr: + val, ok = p.parseRuleRefExpr(expr) + case *seqExpr: + val, ok = p.parseSeqExpr(expr) + case *stateCodeExpr: + val, ok = p.parseStateCodeExpr(expr) + case *throwExpr: + val, ok = p.parseThrowExpr(expr) + case *zeroOrMoreExpr: + val, ok = p.parseZeroOrMoreExpr(expr) + case *zeroOrOneExpr: + val, ok = p.parseZeroOrOneExpr(expr) + default: + panic(fmt.Sprintf("unknown expression type %T", expr)) + } + return val, ok +} + +func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseActionExpr")) + } + + start := p.pt + val, ok := p.parseExprWrap(act.expr) + if ok { + p.cur.pos = start.position + p.cur.text = p.sliceFrom(start) + state := p.cloneState() + actVal, err := act.run(p) + if err != nil { + p.addErrAt(err, start.position, []string{}) + } + p.restoreState(state) + + val = actVal + } + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(start))) + } + return val, ok +} + +func (p *parser) parseAndCodeExpr(and *andCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseAndCodeExpr")) + } + + state := p.cloneState() + + ok, err := and.run(p) + if err != nil { + p.addErr(err) + } + p.restoreState(state) + + return nil, ok +} + +func (p *parser) parseAndExpr(and *andExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseAndExpr")) + } + + pt := p.pt + state := p.cloneState() + p.pushV() + _, ok := p.parseExprWrap(and.expr) + p.popV() + p.restoreState(state) + p.restore(pt) + + return nil, ok +} + +func (p *parser) parseAnyMatcher(any *anyMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseAnyMatcher")) + } + + if p.pt.rn == utf8.RuneError && p.pt.w == 0 { + // EOF - see utf8.DecodeRune + p.failAt(false, p.pt.position, ".") + return nil, false + } + start := p.pt + p.read() + p.failAt(true, start.position, ".") + return p.sliceFrom(start), true +} + +func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseCharClassMatcher")) + } + + cur := p.pt.rn + start := p.pt + + // can't match EOF + if cur == utf8.RuneError && p.pt.w == 0 { // see utf8.DecodeRune + p.failAt(false, start.position, chr.val) + return nil, false + } + + if chr.ignoreCase { + cur = unicode.ToLower(cur) + } + + // try to match in the list of available chars + for _, rn := range chr.chars { + if rn == cur { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of ranges + for i := 0; i < len(chr.ranges); i += 2 { + if cur >= chr.ranges[i] && cur <= chr.ranges[i+1] { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of Unicode classes + for _, cl := range chr.classes { + if unicode.Is(cl, cur) { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + if chr.inverted { + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + p.failAt(false, start.position, chr.val) + return nil, false +} + +func (p *parser) incChoiceAltCnt(ch *choiceExpr, altI int) { + choiceIdent := fmt.Sprintf("%s %d:%d", p.rstack[len(p.rstack)-1].name, ch.pos.line, ch.pos.col) + m := p.ChoiceAltCnt[choiceIdent] + if m == nil { + m = make(map[string]int) + p.ChoiceAltCnt[choiceIdent] = m + } + // We increment altI by 1, so the keys do not start at 0 + alt := strconv.Itoa(altI + 1) + if altI == choiceNoMatch { + alt = p.choiceNoMatch + } + m[alt]++ +} + +func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseChoiceExpr")) + } + + for altI, alt := range ch.alternatives { + // dummy assignment to prevent compile error if optimized + _ = altI + + state := p.cloneState() + + p.pushV() + val, ok := p.parseExprWrap(alt) + p.popV() + if ok { + p.incChoiceAltCnt(ch, altI) + return val, ok + } + p.restoreState(state) + } + p.incChoiceAltCnt(ch, choiceNoMatch) + return nil, false +} + +func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseLabeledExpr")) + } + + p.pushV() + val, ok := p.parseExprWrap(lab.expr) + p.popV() + if ok && lab.label != "" { + m := p.vstack[len(p.vstack)-1] + m[lab.label] = val + } + return val, ok +} + +func (p *parser) parseLitMatcher(lit *litMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseLitMatcher")) + } + + start := p.pt + for _, want := range lit.val { + cur := p.pt.rn + if lit.ignoreCase { + cur = unicode.ToLower(cur) + } + if cur != want { + p.failAt(false, start.position, lit.want) + p.restore(start) + return nil, false + } + p.read() + } + p.failAt(true, start.position, lit.want) + return p.sliceFrom(start), true +} + +func (p *parser) parseNotCodeExpr(not *notCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseNotCodeExpr")) + } + + state := p.cloneState() + + ok, err := not.run(p) + if err != nil { + p.addErr(err) + } + p.restoreState(state) + + return nil, !ok +} + +func (p *parser) parseNotExpr(not *notExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseNotExpr")) + } + + pt := p.pt + state := p.cloneState() + p.pushV() + p.maxFailInvertExpected = !p.maxFailInvertExpected + _, ok := p.parseExprWrap(not.expr) + p.maxFailInvertExpected = !p.maxFailInvertExpected + p.popV() + p.restoreState(state) + p.restore(pt) + + return nil, !ok +} + +func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseOneOrMoreExpr")) + } + + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + if len(vals) == 0 { + // did not match once, no match + return nil, false + } + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseRecoveryExpr (" + strings.Join(recover.failureLabel, ",") + ")")) + } + + p.pushRecovery(recover.failureLabel, recover.recoverExpr) + val, ok := p.parseExprWrap(recover.expr) + p.popRecovery() + + return val, ok +} + +func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseRuleRefExpr " + ref.name)) + } + + if ref.name == "" { + panic(fmt.Sprintf("%s: invalid rule: missing name", ref.pos)) + } + + rule := p.rules[ref.name] + if rule == nil { + p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) + return nil, false + } + return p.parseRuleWrap(rule) +} + +func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseSeqExpr")) + } + + vals := make([]any, 0, len(seq.exprs)) + + pt := p.pt + state := p.cloneState() + for _, expr := range seq.exprs { + val, ok := p.parseExprWrap(expr) + if !ok { + p.restoreState(state) + p.restore(pt) + return nil, false + } + vals = append(vals, val) + } + return vals, true +} + +func (p *parser) parseStateCodeExpr(state *stateCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseStateCodeExpr")) + } + + err := state.run(p) + if err != nil { + p.addErr(err) + } + return nil, true +} + +func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseThrowExpr")) + } + + for i := len(p.recoveryStack) - 1; i >= 0; i-- { + if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { + return val, ok + } + } + } + + return nil, false +} + +func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseZeroOrMoreExpr")) + } + + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseZeroOrOneExpr")) + } + + p.pushV() + val, _ := p.parseExprWrap(expr.expr) + p.popV() + // whether it matched or not, consider it a match + return val, true +} diff --git a/test/issue_79/issue_79.peg b/test/issue_79/issue_79.peg new file mode 100644 index 0000000..1e0b289 --- /dev/null +++ b/test/issue_79/issue_79.peg @@ -0,0 +1,21 @@ +{ + package issue79 +} + +Input <- expr:Expr EOF { + return expr, nil +} + +Expr <- _ Expr _ LogicOp _ Expr _/ _ Value _ + +LogicOp <- ("and" / "or") { + return string(c.text), nil +} + +Value <- [0-9]+ { + return string(c.text),nil +} + +_ "whitespace" <- [ \n\t\r]* + +EOF <- !. diff --git a/test/issue_80/issue_80.go b/test/issue_80/issue_80.go index 2843b0e..404f7e7 100644 --- a/test/issue_80/issue_80.go +++ b/test/issue_80/issue_80.go @@ -693,14 +693,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -902,7 +907,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -943,37 +948,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -985,6 +1005,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1032,9 +1062,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1044,7 +1071,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1058,7 +1085,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1087,7 +1114,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1205,7 +1232,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1223,7 +1250,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1279,7 +1306,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1297,7 +1324,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1316,7 +1343,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1336,7 +1363,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1349,7 +1376,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1379,7 +1406,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1397,7 +1424,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1412,7 +1439,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/labeled_failures/labeled_failures.go b/test/labeled_failures/labeled_failures.go index 2c2e6fa..de8be5b 100644 --- a/test/labeled_failures/labeled_failures.go +++ b/test/labeled_failures/labeled_failures.go @@ -922,14 +922,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -1131,7 +1136,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1172,37 +1177,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -1214,6 +1234,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1261,9 +1291,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1273,7 +1300,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1287,7 +1314,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1316,7 +1343,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1434,7 +1461,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1452,7 +1479,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1508,7 +1535,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1526,7 +1553,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1545,7 +1572,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1565,7 +1592,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1578,7 +1605,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1608,7 +1635,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1626,7 +1653,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1641,7 +1668,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/left_recursion/left_recursion.peg b/test/left_recursion/left_recursion.peg new file mode 100644 index 0000000..fc20780 --- /dev/null +++ b/test/left_recursion/left_recursion.peg @@ -0,0 +1,38 @@ +{ + package leftrecursion +} + +start = a:expr !. { + return a, nil +} + +expr = a:expr op:('+' / '-') b:term { + strA := a.(string) + strB := b.(string) + strOp := string(op.([]byte)) + return "(" + strA + strOp + strB + ")", nil +} / a:term { + strA := a.(string) + return strA, nil +} + +term = a:term op:('*' / '/' / '%') b:factor { + strA := a.(string) + strB := b.(string) + strOp := string(op.([]byte)) + return "(" + strA + strOp + strB + ")", nil + +} / a:factor { + strA := a.(string) + return strA, nil +} + +factor = op:('+' / '-') a:factor { + strA := a.(string) + strOp := string(op.([]byte)) + return "(" + strOp + strA + ")", nil +} / atom { + return string(c.text), nil +} + +atom = [0-9]+ diff --git a/test/left_recursion/left_recursion_test.go b/test/left_recursion/left_recursion_test.go new file mode 100644 index 0000000..3bf4179 --- /dev/null +++ b/test/left_recursion/left_recursion_test.go @@ -0,0 +1,264 @@ +package leftrecursion_test + +import ( + "testing" + + "github.com/mna/pigeon/test/left_recursion/standart/leftrecursion" + "github.com/mna/pigeon/test/left_recursion/standart/withoutleftrecursion" + + optimizedleftrecursion "github.com/mna/pigeon/test/left_recursion/optimized/leftrecursion" + optimizedwithoutleftrecursion "github.com/mna/pigeon/test/left_recursion/optimized/withoutleftrecursion" +) + +func TestLeftRecursionParse(t *testing.T) { + t.Parallel() + + type want struct { + expr string + } + + tests := []struct { + name string + expr string + want want + }{ + { + name: "Complex", + expr: "7+10/2*-4+5*3%6-8*6", + want: want{expr: "(((7+((10/2)*(-4)))+((5*3)%6))-(8*6))"}, + }, + { + name: "Simple", + expr: "2*1+7", + want: want{expr: "((2*1)+7)"}, + }, + { + name: "Simple revers", + expr: "2+1*7", + want: want{expr: "(2+(1*7))"}, + }, + { + name: "Same operations", + expr: "2+1+7", + want: want{expr: "((2+1)+7)"}, + }, + { + name: "Start with unary minus", + expr: "-2+1", + want: want{expr: "((-2)+1)"}, + }, + { + name: "unary minus between + and *", + expr: "2+-7*-1", + want: want{expr: "(2+((-7)*(-1)))"}, + }, + } + + for _, testCase := range tests { + testCase := testCase + + setOptionsLR := map[string][]leftrecursion.Option{ + "memoize": {leftrecursion.Memoize(true)}, + "-": {}, + } + for nameOptionsLR, optionsLR := range setOptionsLR { + optionsLR := optionsLR + + t.Run( + testCase.name+" default(recursion). Options: "+nameOptionsLR, + func(t *testing.T) { + t.Parallel() + + resLR, err := leftrecursion.Parse( + "", []byte(testCase.expr), optionsLR...) + if err != nil { + t.Fatalf( + "for input %q got error: %s, but expect to parse without errors", + testCase.expr, err) + } + exprLR, ok := resLR.(string) + if !ok { + t.FailNow() + } + if exprLR != testCase.want.expr { + t.Fatalf( + "for input %q\ngot result: %q,\nbut expect: %q", + testCase.expr, exprLR, testCase.want.expr) + } + }) + } + + setOptions := map[string][]withoutleftrecursion.Option{ + "memoize": {withoutleftrecursion.Memoize(true)}, + "-": {}, + } + for nameOptions, options := range setOptions { + options := options + + t.Run(testCase.name+" default(without recursion). Options: "+nameOptions, func(t *testing.T) { + t.Parallel() + + res, err := withoutleftrecursion.Parse("", []byte(testCase.expr), options...) + if err != nil { + t.Fatalf( + "for input %q got error: %s, but expect to parse without errors", + testCase.expr, err) + } + expr, ok := res.(string) + if !ok { + t.FailNow() + } + if expr != testCase.want.expr { + t.Fatalf( + "for input %q\ngot result: %q,\nbut expect: %q", + testCase.expr, expr, testCase.want.expr) + } + }) + } + + t.Run(testCase.name+" optimized(recursion)", func(t *testing.T) { + t.Parallel() + + resLR, err := optimizedleftrecursion.Parse("", []byte(testCase.expr)) + if err != nil { + t.Fatalf( + "for input %q got error: %s, but expect to parse without errors", + testCase.expr, err) + } + exprLR, ok := resLR.(string) + if !ok { + t.FailNow() + } + if exprLR != testCase.want.expr { + t.Fatalf( + "for input %q\ngot result: %q,\nbut expect: %q", + testCase.expr, exprLR, testCase.want.expr) + } + }) + + t.Run(testCase.name+" optimized(without recursion)", func(t *testing.T) { + t.Parallel() + + res, err := optimizedwithoutleftrecursion.Parse("", []byte(testCase.expr)) + if err != nil { + t.Fatalf( + "for input %q got error: %s, but expect to parse without errors", + testCase.expr, err) + } + expr, ok := res.(string) + if !ok { + t.FailNow() + } + if expr != testCase.want.expr { + t.Fatalf( + "for input %q\ngot result: %q,\nbut expect: %q", + testCase.expr, expr, testCase.want.expr) + } + }) + } +} + +func FuzzLeftRecursionParse(f *testing.F) { + chars := []byte("0123456789+-/*%") + + f.Fuzz(func(t *testing.T, bytes []byte) { + data := make([]byte, 0, len(bytes)) + for _, b := range bytes { + data = append(data, chars[int(b)%len(chars)]) + } + resLR, errLR := leftrecursion.Parse("", data) + res, err := withoutleftrecursion.Parse("", data) + if err != nil || errLR != nil { + if err == nil || errLR == nil { + t.Fatalf( + "for input %q\ngot error: %q,\nbut expect: %q", + data, errLR, err) + } + return + } + exprLR, okLR := resLR.(string) + if !okLR { + t.FailNow() + } + expr, ok := res.(string) + if !ok { + t.FailNow() + } + if expr != exprLR { + t.Fatalf( + "for input %q\ngot result: %q,\nbut expect: %q", + data, exprLR, expr) + } + }) +} + +func FuzzLeftRecursionParseMemoize(f *testing.F) { + chars := []byte("0123456789+-/*%") + + f.Fuzz(func(t *testing.T, bytes []byte) { + data := make([]byte, 0, len(bytes)) + for _, b := range bytes { + data = append(data, chars[int(b)%len(chars)]) + } + + resLR, errLR := leftrecursion.Parse( + "", data, leftrecursion.Memoize(true)) + res, err := withoutleftrecursion.Parse( + "", data, withoutleftrecursion.Memoize(true)) + if err != nil || errLR != nil { + if err == nil || errLR == nil { + t.Fatalf( + "for input %q\ngot error: %q,\nbut expect: %q", + data, errLR, err) + } + return + } + exprLR, okLR := resLR.(string) + if !okLR { + t.FailNow() + } + expr, ok := res.(string) + if !ok { + t.FailNow() + } + if expr != exprLR { + t.Fatalf( + "for input %q\ngot result: %q,\nbut expect: %q", + data, exprLR, expr) + } + }) +} + +func FuzzLeftRecursionParseOptimized(f *testing.F) { + chars := []byte("0123456789+-/*%") + + f.Fuzz(func(t *testing.T, bytes []byte) { + data := make([]byte, 0, len(bytes)) + for _, b := range bytes { + data = append(data, chars[int(b)%len(chars)]) + } + resLR, errLR := optimizedleftrecursion.Parse("", data) + res, err := optimizedwithoutleftrecursion.Parse("", data) + if err != nil || errLR != nil { + if err == nil || errLR == nil { + t.Fatalf( + "for input %q\ngot error: %q,\nbut expect: %q", + data, errLR, err) + } + return + } + exprLR, okLR := resLR.(string) + if !okLR { + t.FailNow() + } + expr, ok := res.(string) + if !ok { + t.FailNow() + } + if expr != exprLR { + t.Fatalf( + "for input %q\ngot result: %q,\nbut expect: %q", + data, exprLR, expr) + } + }) +} diff --git a/test/left_recursion/optimized/leftrecursion/left_recursion.go b/test/left_recursion/optimized/leftrecursion/left_recursion.go new file mode 100644 index 0000000..b61a0fd --- /dev/null +++ b/test/left_recursion/optimized/leftrecursion/left_recursion.go @@ -0,0 +1,1419 @@ +// Code generated by pigeon; DO NOT EDIT. + +package leftrecursion + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "os" + "sort" + "strconv" + "strings" + "unicode" + "unicode/utf8" +) + +var g = &grammar{ + rules: []*rule{ + { + name: "start", + pos: position{line: 5, col: 1, offset: 29}, + expr: &actionExpr{ + pos: position{line: 5, col: 9, offset: 37}, + run: (*parser).callonstart1, + expr: &seqExpr{ + pos: position{line: 5, col: 9, offset: 37}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 5, col: 9, offset: 37}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 5, col: 11, offset: 39}, + name: "expr", + }, + }, + ¬Expr{ + pos: position{line: 5, col: 16, offset: 44}, + expr: &anyMatcher{ + line: 5, col: 17, offset: 45, + }, + }, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "expr", + pos: position{line: 9, col: 1, offset: 67}, + expr: &choiceExpr{ + pos: position{line: 9, col: 9, offset: 75}, + alternatives: []any{ + &actionExpr{ + pos: position{line: 9, col: 9, offset: 75}, + run: (*parser).callonexpr2, + expr: &seqExpr{ + pos: position{line: 9, col: 9, offset: 75}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 9, col: 9, offset: 75}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 9, col: 11, offset: 77}, + name: "expr", + }, + }, + &labeledExpr{ + pos: position{line: 9, col: 16, offset: 82}, + label: "op", + expr: &choiceExpr{ + pos: position{line: 9, col: 20, offset: 86}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 9, col: 20, offset: 86}, + val: "+", + ignoreCase: false, + want: "\"+\"", + }, + &litMatcher{ + pos: position{line: 9, col: 26, offset: 92}, + val: "-", + ignoreCase: false, + want: "\"-\"", + }, + }, + }, + }, + &labeledExpr{ + pos: position{line: 9, col: 31, offset: 97}, + label: "b", + expr: &ruleRefExpr{ + pos: position{line: 9, col: 33, offset: 99}, + name: "term", + }, + }, + }, + }, + }, + &actionExpr{ + pos: position{line: 14, col: 5, offset: 237}, + run: (*parser).callonexpr12, + expr: &labeledExpr{ + pos: position{line: 14, col: 5, offset: 237}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 14, col: 7, offset: 239}, + name: "term", + }, + }, + }, + }, + }, + leader: true, + leftRecursive: true, + }, + { + name: "term", + pos: position{line: 19, col: 1, offset: 293}, + expr: &choiceExpr{ + pos: position{line: 19, col: 8, offset: 300}, + alternatives: []any{ + &actionExpr{ + pos: position{line: 19, col: 8, offset: 300}, + run: (*parser).callonterm2, + expr: &seqExpr{ + pos: position{line: 19, col: 8, offset: 300}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 19, col: 8, offset: 300}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 19, col: 10, offset: 302}, + name: "term", + }, + }, + &labeledExpr{ + pos: position{line: 19, col: 15, offset: 307}, + label: "op", + expr: &choiceExpr{ + pos: position{line: 19, col: 19, offset: 311}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 19, col: 19, offset: 311}, + val: "*", + ignoreCase: false, + want: "\"*\"", + }, + &litMatcher{ + pos: position{line: 19, col: 25, offset: 317}, + val: "/", + ignoreCase: false, + want: "\"/\"", + }, + &litMatcher{ + pos: position{line: 19, col: 31, offset: 323}, + val: "%", + ignoreCase: false, + want: "\"%\"", + }, + }, + }, + }, + &labeledExpr{ + pos: position{line: 19, col: 36, offset: 328}, + label: "b", + expr: &ruleRefExpr{ + pos: position{line: 19, col: 38, offset: 330}, + name: "factor", + }, + }, + }, + }, + }, + &actionExpr{ + pos: position{line: 25, col: 5, offset: 472}, + run: (*parser).callonterm13, + expr: &labeledExpr{ + pos: position{line: 25, col: 5, offset: 472}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 25, col: 7, offset: 474}, + name: "factor", + }, + }, + }, + }, + }, + leader: true, + leftRecursive: true, + }, + { + name: "factor", + pos: position{line: 30, col: 1, offset: 530}, + expr: &choiceExpr{ + pos: position{line: 30, col: 10, offset: 539}, + alternatives: []any{ + &actionExpr{ + pos: position{line: 30, col: 10, offset: 539}, + run: (*parser).callonfactor2, + expr: &seqExpr{ + pos: position{line: 30, col: 10, offset: 539}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 30, col: 10, offset: 539}, + label: "op", + expr: &choiceExpr{ + pos: position{line: 30, col: 14, offset: 543}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 30, col: 14, offset: 543}, + val: "+", + ignoreCase: false, + want: "\"+\"", + }, + &litMatcher{ + pos: position{line: 30, col: 20, offset: 549}, + val: "-", + ignoreCase: false, + want: "\"-\"", + }, + }, + }, + }, + &labeledExpr{ + pos: position{line: 30, col: 25, offset: 554}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 30, col: 27, offset: 556}, + name: "factor", + }, + }, + }, + }, + }, + &actionExpr{ + pos: position{line: 34, col: 5, offset: 666}, + run: (*parser).callonfactor10, + expr: &ruleRefExpr{ + pos: position{line: 34, col: 5, offset: 666}, + name: "atom", + }, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "atom", + pos: position{line: 38, col: 1, offset: 707}, + expr: &oneOrMoreExpr{ + pos: position{line: 38, col: 8, offset: 714}, + expr: &charClassMatcher{ + pos: position{line: 38, col: 8, offset: 714}, + val: "[0-9]", + ranges: []rune{'0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + leader: false, + leftRecursive: false, + }, + }, +} + +func (c *current) onstart1(a any) (any, error) { + return a, nil +} + +func (p *parser) callonstart1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onstart1(stack["a"]) +} + +func (c *current) onexpr2(a, op, b any) (any, error) { + strA := a.(string) + strB := b.(string) + strOp := string(op.([]byte)) + return "(" + strA + strOp + strB + ")", nil +} + +func (p *parser) callonexpr2() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onexpr2(stack["a"], stack["op"], stack["b"]) +} + +func (c *current) onexpr12(a any) (any, error) { + strA := a.(string) + return strA, nil +} + +func (p *parser) callonexpr12() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onexpr12(stack["a"]) +} + +func (c *current) onterm2(a, op, b any) (any, error) { + strA := a.(string) + strB := b.(string) + strOp := string(op.([]byte)) + return "(" + strA + strOp + strB + ")", nil + +} + +func (p *parser) callonterm2() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onterm2(stack["a"], stack["op"], stack["b"]) +} + +func (c *current) onterm13(a any) (any, error) { + strA := a.(string) + return strA, nil +} + +func (p *parser) callonterm13() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onterm13(stack["a"]) +} + +func (c *current) onfactor2(op, a any) (any, error) { + strA := a.(string) + strOp := string(op.([]byte)) + return "(" + strOp + strA + ")", nil +} + +func (p *parser) callonfactor2() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onfactor2(stack["op"], stack["a"]) +} + +func (c *current) onfactor10() (any, error) { + return string(c.text), nil +} + +func (p *parser) callonfactor10() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onfactor10() +} + +var ( + // errNoRule is returned when the grammar to parse has no rule. + errNoRule = errors.New("grammar has no rule") + + // errInvalidEntrypoint is returned when the specified entrypoint rule + // does not exit. + errInvalidEntrypoint = errors.New("invalid entrypoint") + + // errInvalidEncoding is returned when the source is not properly + // utf8-encoded. + errInvalidEncoding = errors.New("invalid encoding") + + // errMaxExprCnt is used to signal that the maximum number of + // expressions have been parsed. + errMaxExprCnt = errors.New("max number of expresssions parsed") +) + +// Option is a function that can set an option on the parser. It returns +// the previous setting as an Option. +type Option func(*parser) Option + +// MaxExpressions creates an Option to stop parsing after the provided +// number of expressions have been parsed, if the value is 0 then the parser will +// parse for as many steps as needed (possibly an infinite number). +// +// The default for maxExprCnt is 0. +func MaxExpressions(maxExprCnt uint64) Option { + return func(p *parser) Option { + oldMaxExprCnt := p.maxExprCnt + p.maxExprCnt = maxExprCnt + return MaxExpressions(oldMaxExprCnt) + } +} + +// Entrypoint creates an Option to set the rule name to use as entrypoint. +// The rule name must have been specified in the -alternate-entrypoints +// if generating the parser with the -optimize-grammar flag, otherwise +// it may have been optimized out. Passing an empty string sets the +// entrypoint to the first rule in the grammar. +// +// The default is to start parsing at the first rule in the grammar. +func Entrypoint(ruleName string) Option { + return func(p *parser) Option { + oldEntrypoint := p.entrypoint + p.entrypoint = ruleName + if ruleName == "" { + p.entrypoint = g.rules[0].name + } + return Entrypoint(oldEntrypoint) + } +} + +// AllowInvalidUTF8 creates an Option to allow invalid UTF-8 bytes. +// Every invalid UTF-8 byte is treated as a utf8.RuneError (U+FFFD) +// by character class matchers and is matched by the any matcher. +// The returned matched value, c.text and c.offset are NOT affected. +// +// The default is false. +func AllowInvalidUTF8(b bool) Option { + return func(p *parser) Option { + old := p.allowInvalidUTF8 + p.allowInvalidUTF8 = b + return AllowInvalidUTF8(old) + } +} + +// Recover creates an Option to set the recover flag to b. When set to +// true, this causes the parser to recover from panics and convert it +// to an error. Setting it to false can be useful while debugging to +// access the full stack trace. +// +// The default is true. +func Recover(b bool) Option { + return func(p *parser) Option { + old := p.recover + p.recover = b + return Recover(old) + } +} + +// GlobalStore creates an Option to set a key to a certain value in +// the globalStore. +func GlobalStore(key string, value any) Option { + return func(p *parser) Option { + old := p.cur.globalStore[key] + p.cur.globalStore[key] = value + return GlobalStore(key, old) + } +} + +// ParseFile parses the file identified by filename. +func ParseFile(filename string, opts ...Option) (i any, err error) { // nolint: deadcode + f, err := os.Open(filename) + if err != nil { + return nil, err + } + defer func() { + if closeErr := f.Close(); closeErr != nil { + err = closeErr + } + }() + return ParseReader(filename, f, opts...) +} + +// ParseReader parses the data from r using filename as information in the +// error messages. +func ParseReader(filename string, r io.Reader, opts ...Option) (any, error) { // nolint: deadcode + b, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + return Parse(filename, b, opts...) +} + +// Parse parses the data from b using filename as information in the +// error messages. +func Parse(filename string, b []byte, opts ...Option) (any, error) { + return newParser(filename, b, opts...).parse(g) +} + +// position records a position in the text. +type position struct { + line, col, offset int +} + +func (p position) String() string { + return strconv.Itoa(p.line) + ":" + strconv.Itoa(p.col) + " [" + strconv.Itoa(p.offset) + "]" +} + +// savepoint stores all state required to go back to this point in the +// parser. +type savepoint struct { + position + rn rune + w int +} + +type current struct { + pos position // start position of the match + text []byte // raw text of the match + + // globalStore is a general store for the user to store arbitrary key-value + // pairs that they need to manage and that they do not want tied to the + // backtracking of the parser. This is only modified by the user and never + // rolled back by the parser. It is always up to the user to keep this in a + // consistent state. + globalStore storeDict +} + +type storeDict map[string]any + +// the AST types... + +// nolint: structcheck +type grammar struct { + pos position + rules []*rule +} + +// nolint: structcheck +type rule struct { + pos position + name string + displayName string + expr any + + leader bool + leftRecursive bool +} + +// nolint: structcheck +type choiceExpr struct { + pos position + alternatives []any +} + +// nolint: structcheck +type actionExpr struct { + pos position + expr any + run func(*parser) (any, error) +} + +// nolint: structcheck +type recoveryExpr struct { + pos position + expr any + recoverExpr any + failureLabel []string +} + +// nolint: structcheck +type seqExpr struct { + pos position + exprs []any +} + +// nolint: structcheck +type throwExpr struct { + pos position + label string +} + +// nolint: structcheck +type labeledExpr struct { + pos position + label string + expr any +} + +// nolint: structcheck +type expr struct { + pos position + expr any +} + +type ( + andExpr expr // nolint: structcheck + notExpr expr // nolint: structcheck + zeroOrOneExpr expr // nolint: structcheck + zeroOrMoreExpr expr // nolint: structcheck + oneOrMoreExpr expr // nolint: structcheck +) + +// nolint: structcheck +type ruleRefExpr struct { + pos position + name string +} + +// nolint: structcheck +type andCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +// nolint: structcheck +type notCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +// nolint: structcheck +type litMatcher struct { + pos position + val string + ignoreCase bool + want string +} + +// nolint: structcheck +type charClassMatcher struct { + pos position + val string + basicLatinChars [128]bool + chars []rune + ranges []rune + classes []*unicode.RangeTable + ignoreCase bool + inverted bool +} + +type anyMatcher position // nolint: structcheck + +// errList cumulates the errors found by the parser. +type errList []error + +func (e *errList) add(err error) { + *e = append(*e, err) +} + +func (e errList) err() error { + if len(e) == 0 { + return nil + } + e.dedupe() + return e +} + +func (e *errList) dedupe() { + var cleaned []error + set := make(map[string]bool) + for _, err := range *e { + if msg := err.Error(); !set[msg] { + set[msg] = true + cleaned = append(cleaned, err) + } + } + *e = cleaned +} + +func (e errList) Error() string { + switch len(e) { + case 0: + return "" + case 1: + return e[0].Error() + default: + var buf bytes.Buffer + + for i, err := range e { + if i > 0 { + buf.WriteRune('\n') + } + buf.WriteString(err.Error()) + } + return buf.String() + } +} + +// parserError wraps an error with a prefix indicating the rule in which +// the error occurred. The original error is stored in the Inner field. +type parserError struct { + Inner error + pos position + prefix string + expected []string +} + +// Error returns the error message. +func (p *parserError) Error() string { + return p.prefix + ": " + p.Inner.Error() +} + +// newParser creates a parser with the specified input source and options. +func newParser(filename string, b []byte, opts ...Option) *parser { + stats := Stats{ + ChoiceAltCnt: make(map[string]map[string]int), + } + + p := &parser{ + filename: filename, + errs: new(errList), + data: b, + pt: savepoint{position: position{line: 1}}, + recover: true, + cur: current{ + globalStore: make(storeDict), + }, + maxFailPos: position{col: 1, line: 1}, + maxFailExpected: make([]string, 0, 20), + Stats: &stats, + // start rule is rule [0] unless an alternate entrypoint is specified + entrypoint: g.rules[0].name, + } + p.setOptions(opts) + + if p.maxExprCnt == 0 { + p.maxExprCnt = math.MaxUint64 + } + + return p +} + +// setOptions applies the options to the parser. +func (p *parser) setOptions(opts []Option) { + for _, opt := range opts { + opt(p) + } +} + +// nolint: structcheck,deadcode +type resultTuple struct { + v any + b bool + end savepoint +} + +// nolint: varcheck +const choiceNoMatch = -1 + +// Stats stores some statistics, gathered during parsing +type Stats struct { + // ExprCnt counts the number of expressions processed during parsing + // This value is compared to the maximum number of expressions allowed + // (set by the MaxExpressions option). + ExprCnt uint64 + + // ChoiceAltCnt is used to count for each ordered choice expression, + // which alternative is used how may times. + // These numbers allow to optimize the order of the ordered choice expression + // to increase the performance of the parser + // + // The outer key of ChoiceAltCnt is composed of the name of the rule as well + // as the line and the column of the ordered choice. + // The inner key of ChoiceAltCnt is the number (one-based) of the matching alternative. + // For each alternative the number of matches are counted. If an ordered choice does not + // match, a special counter is incremented. The name of this counter is set with + // the parser option Statistics. + // For an alternative to be included in ChoiceAltCnt, it has to match at least once. + ChoiceAltCnt map[string]map[string]int +} + +type ruleWithExpsStack struct { + rule *rule + estack []any +} + +// nolint: structcheck,maligned +type parser struct { + filename string + pt savepoint + cur current + + data []byte + errs *errList + + depth int + recover bool + // memoization table for the packrat algorithm: + // map[offset in source] map[expression or rule] {value, match} + memo map[int]map[any]resultTuple + + // rules table, maps the rule identifier to the rule node + rules map[string]*rule + // variables stack, map of label to value + vstack []map[string]any + // rule stack, allows identification of the current rule in errors + rstack []*rule + + // parse fail + maxFailPos position + maxFailExpected []string + maxFailInvertExpected bool + + // max number of expressions to be parsed + maxExprCnt uint64 + // entrypoint for the parser + entrypoint string + + allowInvalidUTF8 bool + + *Stats + + choiceNoMatch string + // recovery expression stack, keeps track of the currently available recovery expression, these are traversed in reverse + recoveryStack []map[string]any +} + +// push a variable set on the vstack. +func (p *parser) pushV() { + if cap(p.vstack) == len(p.vstack) { + // create new empty slot in the stack + p.vstack = append(p.vstack, nil) + } else { + // slice to 1 more + p.vstack = p.vstack[:len(p.vstack)+1] + } + + // get the last args set + m := p.vstack[len(p.vstack)-1] + if m != nil && len(m) == 0 { + // empty map, all good + return + } + + m = make(map[string]any) + p.vstack[len(p.vstack)-1] = m +} + +// pop a variable set from the vstack. +func (p *parser) popV() { + // if the map is not empty, clear it + m := p.vstack[len(p.vstack)-1] + if len(m) > 0 { + // GC that map + p.vstack[len(p.vstack)-1] = nil + } + p.vstack = p.vstack[:len(p.vstack)-1] +} + +// push a recovery expression with its labels to the recoveryStack +func (p *parser) pushRecovery(labels []string, expr any) { + if cap(p.recoveryStack) == len(p.recoveryStack) { + // create new empty slot in the stack + p.recoveryStack = append(p.recoveryStack, nil) + } else { + // slice to 1 more + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)+1] + } + + m := make(map[string]any, len(labels)) + for _, fl := range labels { + m[fl] = expr + } + p.recoveryStack[len(p.recoveryStack)-1] = m +} + +// pop a recovery expression from the recoveryStack +func (p *parser) popRecovery() { + // GC that map + p.recoveryStack[len(p.recoveryStack)-1] = nil + + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)-1] +} + +func (p *parser) addErr(err error) { + p.addErrAt(err, p.pt.position, []string{}) +} + +func (p *parser) addErrAt(err error, pos position, expected []string) { + var buf bytes.Buffer + if p.filename != "" { + buf.WriteString(p.filename) + } + if buf.Len() > 0 { + buf.WriteString(":") + } + buf.WriteString(fmt.Sprintf("%d:%d (%d)", pos.line, pos.col, pos.offset)) + if len(p.rstack) > 0 { + if buf.Len() > 0 { + buf.WriteString(": ") + } + rule := p.rstack[len(p.rstack)-1] + if rule.displayName != "" { + buf.WriteString("rule " + rule.displayName) + } else { + buf.WriteString("rule " + rule.name) + } + } + pe := &parserError{Inner: err, pos: pos, prefix: buf.String(), expected: expected} + p.errs.add(pe) +} + +func (p *parser) failAt(fail bool, pos position, want string) { + // process fail if parsing fails and not inverted or parsing succeeds and invert is set + if fail == p.maxFailInvertExpected { + if pos.offset < p.maxFailPos.offset { + return + } + + if pos.offset > p.maxFailPos.offset { + p.maxFailPos = pos + p.maxFailExpected = p.maxFailExpected[:0] + } + + if p.maxFailInvertExpected { + want = "!" + want + } + p.maxFailExpected = append(p.maxFailExpected, want) + } +} + +// read advances the parser to the next rune. +func (p *parser) read() { + p.pt.offset += p.pt.w + rn, n := utf8.DecodeRune(p.data[p.pt.offset:]) + p.pt.rn = rn + p.pt.w = n + p.pt.col++ + if rn == '\n' { + p.pt.line++ + p.pt.col = 0 + } + + if rn == utf8.RuneError && n == 1 { // see utf8.DecodeRune + if !p.allowInvalidUTF8 { + p.addErr(errInvalidEncoding) + } + } +} + +// restore parser position to the savepoint pt. +func (p *parser) restore(pt savepoint) { + if pt.offset == p.pt.offset { + return + } + p.pt = pt +} + +// get the slice of bytes from the savepoint start to the current position. +func (p *parser) sliceFrom(start savepoint) []byte { + return p.data[start.position.offset:p.pt.position.offset] +} + +func (p *parser) getMemoized(node any) (resultTuple, bool) { + if len(p.memo) == 0 { + return resultTuple{}, false + } + m := p.memo[p.pt.offset] + if len(m) == 0 { + return resultTuple{}, false + } + res, ok := m[node] + return res, ok +} + +func (p *parser) setMemoized(pt savepoint, node any, tuple resultTuple) { + if p.memo == nil { + p.memo = make(map[int]map[any]resultTuple) + } + m := p.memo[pt.offset] + if m == nil { + m = make(map[any]resultTuple) + p.memo[pt.offset] = m + } + m[node] = tuple +} + +func (p *parser) buildRulesTable(g *grammar) { + p.rules = make(map[string]*rule, len(g.rules)) + for _, r := range g.rules { + p.rules[r.name] = r + } +} + +// nolint: gocyclo +func (p *parser) parse(g *grammar) (val any, err error) { + if len(g.rules) == 0 { + p.addErr(errNoRule) + return nil, p.errs.err() + } + + // TODO : not super critical but this could be generated + p.buildRulesTable(g) + + if p.recover { + // panic can be used in action code to stop parsing immediately + // and return the panic as an error. + defer func() { + if e := recover(); e != nil { + val = nil + switch e := e.(type) { + case error: + p.addErr(e) + default: + p.addErr(fmt.Errorf("%v", e)) + } + err = p.errs.err() + } + }() + } + + startRule, ok := p.rules[p.entrypoint] + if !ok { + p.addErr(errInvalidEntrypoint) + return nil, p.errs.err() + } + + p.read() // advance to first rune + val, ok = p.parseRuleWrap(startRule) + if !ok { + if len(*p.errs) == 0 { + // If parsing fails, but no errors have been recorded, the expected values + // for the farthest parser position are returned as error. + maxFailExpectedMap := make(map[string]struct{}, len(p.maxFailExpected)) + for _, v := range p.maxFailExpected { + maxFailExpectedMap[v] = struct{}{} + } + expected := make([]string, 0, len(maxFailExpectedMap)) + eof := false + if _, ok := maxFailExpectedMap["!."]; ok { + delete(maxFailExpectedMap, "!.") + eof = true + } + for k := range maxFailExpectedMap { + expected = append(expected, k) + } + sort.Strings(expected) + if eof { + expected = append(expected, "EOF") + } + p.addErrAt(errors.New("no match found, expected: "+listJoin(expected, ", ", "or")), p.maxFailPos, expected) + } + + return nil, p.errs.err() + } + return val, p.errs.err() +} + +func listJoin(list []string, sep string, lastSep string) string { + switch len(list) { + case 0: + return "" + case 1: + return list[0] + default: + return strings.Join(list[:len(list)-1], sep) + " " + lastSep + " " + list[len(list)-1] + } +} + +func (p *parser) parseRuleRecursiveLeader(rule *rule) (any, bool) { + result, ok := p.getMemoized(rule) + if ok { + p.restore(result.end) + return result.v, result.b + } + + var ( + depth = 0 + startMark = p.pt + lastResult = resultTuple{nil, false, startMark} + lastErrors = *p.errs + ) + + for { + p.setMemoized(startMark, rule, lastResult) + val, ok := p.parseRule(rule) + endMark := p.pt + if (!ok) || (endMark.offset <= lastResult.end.offset && depth != 0) { + *p.errs = lastErrors + break + } + lastResult = resultTuple{val, ok, endMark} + lastErrors = *p.errs + p.restore(startMark) + depth++ + } + + p.restore(lastResult.end) + p.setMemoized(startMark, rule, lastResult) + return lastResult.v, lastResult.b +} + +func (p *parser) parseRuleRecursiveNoLeader(rule *rule) (any, bool) { + return p.parseRule(rule) +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { + var ( + val any + ok bool + ) + + if rule.leftRecursive { + if rule.leader { + val, ok = p.parseRuleRecursiveLeader(rule) + } else { + val, ok = p.parseRuleRecursiveNoLeader(rule) + } + } else { + val, ok = p.parseRule(rule) + } + + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { + p.rstack = append(p.rstack, rule) + p.pushV() + val, ok := p.parseExprWrap(rule.expr) + p.popV() + p.rstack = p.rstack[:len(p.rstack)-1] + return val, ok +} + +func (p *parser) parseExprWrap(expr any) (any, bool) { + val, ok := p.parseExpr(expr) + + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { + p.ExprCnt++ + if p.ExprCnt > p.maxExprCnt { + panic(errMaxExprCnt) + } + + var val any + var ok bool + switch expr := expr.(type) { + case *actionExpr: + val, ok = p.parseActionExpr(expr) + case *andCodeExpr: + val, ok = p.parseAndCodeExpr(expr) + case *andExpr: + val, ok = p.parseAndExpr(expr) + case *anyMatcher: + val, ok = p.parseAnyMatcher(expr) + case *charClassMatcher: + val, ok = p.parseCharClassMatcher(expr) + case *choiceExpr: + val, ok = p.parseChoiceExpr(expr) + case *labeledExpr: + val, ok = p.parseLabeledExpr(expr) + case *litMatcher: + val, ok = p.parseLitMatcher(expr) + case *notCodeExpr: + val, ok = p.parseNotCodeExpr(expr) + case *notExpr: + val, ok = p.parseNotExpr(expr) + case *oneOrMoreExpr: + val, ok = p.parseOneOrMoreExpr(expr) + case *recoveryExpr: + val, ok = p.parseRecoveryExpr(expr) + case *ruleRefExpr: + val, ok = p.parseRuleRefExpr(expr) + case *seqExpr: + val, ok = p.parseSeqExpr(expr) + case *throwExpr: + val, ok = p.parseThrowExpr(expr) + case *zeroOrMoreExpr: + val, ok = p.parseZeroOrMoreExpr(expr) + case *zeroOrOneExpr: + val, ok = p.parseZeroOrOneExpr(expr) + default: + panic(fmt.Sprintf("unknown expression type %T", expr)) + } + return val, ok +} + +func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { + start := p.pt + val, ok := p.parseExprWrap(act.expr) + if ok { + p.cur.pos = start.position + p.cur.text = p.sliceFrom(start) + actVal, err := act.run(p) + if err != nil { + p.addErrAt(err, start.position, []string{}) + } + + val = actVal + } + return val, ok +} + +func (p *parser) parseAndCodeExpr(and *andCodeExpr) (any, bool) { + + ok, err := and.run(p) + if err != nil { + p.addErr(err) + } + + return nil, ok +} + +func (p *parser) parseAndExpr(and *andExpr) (any, bool) { + pt := p.pt + p.pushV() + _, ok := p.parseExprWrap(and.expr) + p.popV() + p.restore(pt) + + return nil, ok +} + +func (p *parser) parseAnyMatcher(any *anyMatcher) (any, bool) { + if p.pt.rn == utf8.RuneError && p.pt.w == 0 { + // EOF - see utf8.DecodeRune + p.failAt(false, p.pt.position, ".") + return nil, false + } + start := p.pt + p.read() + p.failAt(true, start.position, ".") + return p.sliceFrom(start), true +} + +// nolint: gocyclo +func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (any, bool) { + cur := p.pt.rn + start := p.pt + + // can't match EOF + if cur == utf8.RuneError && p.pt.w == 0 { // see utf8.DecodeRune + p.failAt(false, start.position, chr.val) + return nil, false + } + + if chr.ignoreCase { + cur = unicode.ToLower(cur) + } + + // try to match in the list of available chars + for _, rn := range chr.chars { + if rn == cur { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of ranges + for i := 0; i < len(chr.ranges); i += 2 { + if cur >= chr.ranges[i] && cur <= chr.ranges[i+1] { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of Unicode classes + for _, cl := range chr.classes { + if unicode.Is(cl, cur) { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + if chr.inverted { + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + p.failAt(false, start.position, chr.val) + return nil, false +} + +func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { + + for altI, alt := range ch.alternatives { + // dummy assignment to prevent compile error if optimized + _ = altI + + p.pushV() + val, ok := p.parseExprWrap(alt) + p.popV() + if ok { + return val, ok + } + } + return nil, false +} + +func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { + p.pushV() + val, ok := p.parseExprWrap(lab.expr) + p.popV() + if ok && lab.label != "" { + m := p.vstack[len(p.vstack)-1] + m[lab.label] = val + } + return val, ok +} + +func (p *parser) parseLitMatcher(lit *litMatcher) (any, bool) { + start := p.pt + for _, want := range lit.val { + cur := p.pt.rn + if lit.ignoreCase { + cur = unicode.ToLower(cur) + } + if cur != want { + p.failAt(false, start.position, lit.want) + p.restore(start) + return nil, false + } + p.read() + } + p.failAt(true, start.position, lit.want) + return p.sliceFrom(start), true +} + +func (p *parser) parseNotCodeExpr(not *notCodeExpr) (any, bool) { + ok, err := not.run(p) + if err != nil { + p.addErr(err) + } + + return nil, !ok +} + +func (p *parser) parseNotExpr(not *notExpr) (any, bool) { + pt := p.pt + p.pushV() + p.maxFailInvertExpected = !p.maxFailInvertExpected + _, ok := p.parseExprWrap(not.expr) + p.maxFailInvertExpected = !p.maxFailInvertExpected + p.popV() + p.restore(pt) + + return nil, !ok +} + +func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + if len(vals) == 0 { + // did not match once, no match + return nil, false + } + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { + + p.pushRecovery(recover.failureLabel, recover.recoverExpr) + val, ok := p.parseExprWrap(recover.expr) + p.popRecovery() + + return val, ok +} + +func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { + if ref.name == "" { + panic(fmt.Sprintf("%s: invalid rule: missing name", ref.pos)) + } + + rule := p.rules[ref.name] + if rule == nil { + p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) + return nil, false + } + return p.parseRuleWrap(rule) +} + +func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { + vals := make([]any, 0, len(seq.exprs)) + + pt := p.pt + for _, expr := range seq.exprs { + val, ok := p.parseExprWrap(expr) + if !ok { + p.restore(pt) + return nil, false + } + vals = append(vals, val) + } + return vals, true +} + +func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { + + for i := len(p.recoveryStack) - 1; i >= 0; i-- { + if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { + return val, ok + } + } + } + + return nil, false +} + +func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { + p.pushV() + val, _ := p.parseExprWrap(expr.expr) + p.popV() + // whether it matched or not, consider it a match + return val, true +} diff --git a/test/left_recursion/optimized/withoutleftrecursion/without_left_recursion.go b/test/left_recursion/optimized/withoutleftrecursion/without_left_recursion.go new file mode 100644 index 0000000..749d6f8 --- /dev/null +++ b/test/left_recursion/optimized/withoutleftrecursion/without_left_recursion.go @@ -0,0 +1,1295 @@ +// Code generated by pigeon; DO NOT EDIT. + +package withoutleftrecursion + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "os" + "sort" + "strconv" + "strings" + "unicode" + "unicode/utf8" +) + +func toAnySlice(v any) []any { + if v == nil { + return nil + } + return v.([]any) +} + +func exprToString(first string, rest any) string { + restSl := toAnySlice(rest) + l := first + for _, v := range restSl { + restExpr := toAnySlice(v) + r := restExpr[1].(string) + op := string(restExpr[0].([]byte)) + l = "(" + l + op + r + ")" + } + return l +} + +var g = &grammar{ + rules: []*rule{ + { + name: "start", + pos: position{line: 24, col: 1, offset: 500}, + expr: &actionExpr{ + pos: position{line: 24, col: 9, offset: 508}, + run: (*parser).callonstart1, + expr: &seqExpr{ + pos: position{line: 24, col: 9, offset: 508}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 24, col: 9, offset: 508}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 24, col: 11, offset: 510}, + name: "expr", + }, + }, + ¬Expr{ + pos: position{line: 24, col: 16, offset: 515}, + expr: &anyMatcher{ + line: 24, col: 17, offset: 516, + }, + }, + }, + }, + }, + }, + { + name: "expr", + pos: position{line: 27, col: 1, offset: 537}, + expr: &actionExpr{ + pos: position{line: 27, col: 8, offset: 544}, + run: (*parser).callonexpr1, + expr: &seqExpr{ + pos: position{line: 27, col: 8, offset: 544}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 27, col: 8, offset: 544}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 27, col: 10, offset: 546}, + name: "term", + }, + }, + &labeledExpr{ + pos: position{line: 27, col: 15, offset: 551}, + label: "b", + expr: &zeroOrMoreExpr{ + pos: position{line: 27, col: 17, offset: 553}, + expr: &seqExpr{ + pos: position{line: 27, col: 18, offset: 554}, + exprs: []any{ + &choiceExpr{ + pos: position{line: 27, col: 20, offset: 556}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 27, col: 20, offset: 556}, + val: "+", + ignoreCase: false, + want: "\"+\"", + }, + &litMatcher{ + pos: position{line: 27, col: 26, offset: 562}, + val: "-", + ignoreCase: false, + want: "\"-\"", + }, + }, + }, + &ruleRefExpr{ + pos: position{line: 27, col: 32, offset: 568}, + name: "term", + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "term", + pos: position{line: 31, col: 1, offset: 641}, + expr: &actionExpr{ + pos: position{line: 31, col: 8, offset: 648}, + run: (*parser).callonterm1, + expr: &seqExpr{ + pos: position{line: 31, col: 8, offset: 648}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 31, col: 8, offset: 648}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 31, col: 10, offset: 650}, + name: "factor", + }, + }, + &labeledExpr{ + pos: position{line: 31, col: 17, offset: 657}, + label: "b", + expr: &zeroOrMoreExpr{ + pos: position{line: 31, col: 19, offset: 659}, + expr: &seqExpr{ + pos: position{line: 31, col: 21, offset: 661}, + exprs: []any{ + &choiceExpr{ + pos: position{line: 31, col: 23, offset: 663}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 31, col: 23, offset: 663}, + val: "*", + ignoreCase: false, + want: "\"*\"", + }, + &litMatcher{ + pos: position{line: 31, col: 29, offset: 669}, + val: "/", + ignoreCase: false, + want: "\"/\"", + }, + &litMatcher{ + pos: position{line: 31, col: 35, offset: 675}, + val: "%", + ignoreCase: false, + want: "\"%\"", + }, + }, + }, + &ruleRefExpr{ + pos: position{line: 31, col: 40, offset: 680}, + name: "factor", + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "factor", + pos: position{line: 35, col: 1, offset: 755}, + expr: &choiceExpr{ + pos: position{line: 35, col: 10, offset: 764}, + alternatives: []any{ + &actionExpr{ + pos: position{line: 35, col: 10, offset: 764}, + run: (*parser).callonfactor2, + expr: &seqExpr{ + pos: position{line: 35, col: 10, offset: 764}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 35, col: 10, offset: 764}, + label: "op", + expr: &choiceExpr{ + pos: position{line: 35, col: 14, offset: 768}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 35, col: 14, offset: 768}, + val: "+", + ignoreCase: false, + want: "\"+\"", + }, + &litMatcher{ + pos: position{line: 35, col: 20, offset: 774}, + val: "-", + ignoreCase: false, + want: "\"-\"", + }, + }, + }, + }, + &labeledExpr{ + pos: position{line: 35, col: 25, offset: 779}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 35, col: 27, offset: 781}, + name: "factor", + }, + }, + }, + }, + }, + &actionExpr{ + pos: position{line: 39, col: 5, offset: 891}, + run: (*parser).callonfactor10, + expr: &ruleRefExpr{ + pos: position{line: 39, col: 5, offset: 891}, + name: "atom", + }, + }, + }, + }, + }, + { + name: "atom", + pos: position{line: 42, col: 1, offset: 931}, + expr: &oneOrMoreExpr{ + pos: position{line: 42, col: 8, offset: 938}, + expr: &charClassMatcher{ + pos: position{line: 42, col: 8, offset: 938}, + val: "[0-9]", + ranges: []rune{'0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + }, + }, +} + +func (c *current) onstart1(a any) (any, error) { + return a, nil +} + +func (p *parser) callonstart1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onstart1(stack["a"]) +} + +func (c *current) onexpr1(a, b any) (any, error) { + strA := a.(string) + return exprToString(strA, b), nil +} + +func (p *parser) callonexpr1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onexpr1(stack["a"], stack["b"]) +} + +func (c *current) onterm1(a, b any) (any, error) { + strA := a.(string) + return exprToString(strA, b), nil +} + +func (p *parser) callonterm1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onterm1(stack["a"], stack["b"]) +} + +func (c *current) onfactor2(op, a any) (any, error) { + strA := a.(string) + strOp := string(op.([]byte)) + return "(" + strOp + strA + ")", nil +} + +func (p *parser) callonfactor2() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onfactor2(stack["op"], stack["a"]) +} + +func (c *current) onfactor10() (any, error) { + return string(c.text), nil +} + +func (p *parser) callonfactor10() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onfactor10() +} + +var ( + // errNoRule is returned when the grammar to parse has no rule. + errNoRule = errors.New("grammar has no rule") + + // errInvalidEntrypoint is returned when the specified entrypoint rule + // does not exit. + errInvalidEntrypoint = errors.New("invalid entrypoint") + + // errInvalidEncoding is returned when the source is not properly + // utf8-encoded. + errInvalidEncoding = errors.New("invalid encoding") + + // errMaxExprCnt is used to signal that the maximum number of + // expressions have been parsed. + errMaxExprCnt = errors.New("max number of expresssions parsed") +) + +// Option is a function that can set an option on the parser. It returns +// the previous setting as an Option. +type Option func(*parser) Option + +// MaxExpressions creates an Option to stop parsing after the provided +// number of expressions have been parsed, if the value is 0 then the parser will +// parse for as many steps as needed (possibly an infinite number). +// +// The default for maxExprCnt is 0. +func MaxExpressions(maxExprCnt uint64) Option { + return func(p *parser) Option { + oldMaxExprCnt := p.maxExprCnt + p.maxExprCnt = maxExprCnt + return MaxExpressions(oldMaxExprCnt) + } +} + +// Entrypoint creates an Option to set the rule name to use as entrypoint. +// The rule name must have been specified in the -alternate-entrypoints +// if generating the parser with the -optimize-grammar flag, otherwise +// it may have been optimized out. Passing an empty string sets the +// entrypoint to the first rule in the grammar. +// +// The default is to start parsing at the first rule in the grammar. +func Entrypoint(ruleName string) Option { + return func(p *parser) Option { + oldEntrypoint := p.entrypoint + p.entrypoint = ruleName + if ruleName == "" { + p.entrypoint = g.rules[0].name + } + return Entrypoint(oldEntrypoint) + } +} + +// AllowInvalidUTF8 creates an Option to allow invalid UTF-8 bytes. +// Every invalid UTF-8 byte is treated as a utf8.RuneError (U+FFFD) +// by character class matchers and is matched by the any matcher. +// The returned matched value, c.text and c.offset are NOT affected. +// +// The default is false. +func AllowInvalidUTF8(b bool) Option { + return func(p *parser) Option { + old := p.allowInvalidUTF8 + p.allowInvalidUTF8 = b + return AllowInvalidUTF8(old) + } +} + +// Recover creates an Option to set the recover flag to b. When set to +// true, this causes the parser to recover from panics and convert it +// to an error. Setting it to false can be useful while debugging to +// access the full stack trace. +// +// The default is true. +func Recover(b bool) Option { + return func(p *parser) Option { + old := p.recover + p.recover = b + return Recover(old) + } +} + +// GlobalStore creates an Option to set a key to a certain value in +// the globalStore. +func GlobalStore(key string, value any) Option { + return func(p *parser) Option { + old := p.cur.globalStore[key] + p.cur.globalStore[key] = value + return GlobalStore(key, old) + } +} + +// ParseFile parses the file identified by filename. +func ParseFile(filename string, opts ...Option) (i any, err error) { // nolint: deadcode + f, err := os.Open(filename) + if err != nil { + return nil, err + } + defer func() { + if closeErr := f.Close(); closeErr != nil { + err = closeErr + } + }() + return ParseReader(filename, f, opts...) +} + +// ParseReader parses the data from r using filename as information in the +// error messages. +func ParseReader(filename string, r io.Reader, opts ...Option) (any, error) { // nolint: deadcode + b, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + return Parse(filename, b, opts...) +} + +// Parse parses the data from b using filename as information in the +// error messages. +func Parse(filename string, b []byte, opts ...Option) (any, error) { + return newParser(filename, b, opts...).parse(g) +} + +// position records a position in the text. +type position struct { + line, col, offset int +} + +func (p position) String() string { + return strconv.Itoa(p.line) + ":" + strconv.Itoa(p.col) + " [" + strconv.Itoa(p.offset) + "]" +} + +// savepoint stores all state required to go back to this point in the +// parser. +type savepoint struct { + position + rn rune + w int +} + +type current struct { + pos position // start position of the match + text []byte // raw text of the match + + // globalStore is a general store for the user to store arbitrary key-value + // pairs that they need to manage and that they do not want tied to the + // backtracking of the parser. This is only modified by the user and never + // rolled back by the parser. It is always up to the user to keep this in a + // consistent state. + globalStore storeDict +} + +type storeDict map[string]any + +// the AST types... + +// nolint: structcheck +type grammar struct { + pos position + rules []*rule +} + +// nolint: structcheck +type rule struct { + pos position + name string + displayName string + expr any +} + +// nolint: structcheck +type choiceExpr struct { + pos position + alternatives []any +} + +// nolint: structcheck +type actionExpr struct { + pos position + expr any + run func(*parser) (any, error) +} + +// nolint: structcheck +type recoveryExpr struct { + pos position + expr any + recoverExpr any + failureLabel []string +} + +// nolint: structcheck +type seqExpr struct { + pos position + exprs []any +} + +// nolint: structcheck +type throwExpr struct { + pos position + label string +} + +// nolint: structcheck +type labeledExpr struct { + pos position + label string + expr any +} + +// nolint: structcheck +type expr struct { + pos position + expr any +} + +type ( + andExpr expr // nolint: structcheck + notExpr expr // nolint: structcheck + zeroOrOneExpr expr // nolint: structcheck + zeroOrMoreExpr expr // nolint: structcheck + oneOrMoreExpr expr // nolint: structcheck +) + +// nolint: structcheck +type ruleRefExpr struct { + pos position + name string +} + +// nolint: structcheck +type andCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +// nolint: structcheck +type notCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +// nolint: structcheck +type litMatcher struct { + pos position + val string + ignoreCase bool + want string +} + +// nolint: structcheck +type charClassMatcher struct { + pos position + val string + basicLatinChars [128]bool + chars []rune + ranges []rune + classes []*unicode.RangeTable + ignoreCase bool + inverted bool +} + +type anyMatcher position // nolint: structcheck + +// errList cumulates the errors found by the parser. +type errList []error + +func (e *errList) add(err error) { + *e = append(*e, err) +} + +func (e errList) err() error { + if len(e) == 0 { + return nil + } + e.dedupe() + return e +} + +func (e *errList) dedupe() { + var cleaned []error + set := make(map[string]bool) + for _, err := range *e { + if msg := err.Error(); !set[msg] { + set[msg] = true + cleaned = append(cleaned, err) + } + } + *e = cleaned +} + +func (e errList) Error() string { + switch len(e) { + case 0: + return "" + case 1: + return e[0].Error() + default: + var buf bytes.Buffer + + for i, err := range e { + if i > 0 { + buf.WriteRune('\n') + } + buf.WriteString(err.Error()) + } + return buf.String() + } +} + +// parserError wraps an error with a prefix indicating the rule in which +// the error occurred. The original error is stored in the Inner field. +type parserError struct { + Inner error + pos position + prefix string + expected []string +} + +// Error returns the error message. +func (p *parserError) Error() string { + return p.prefix + ": " + p.Inner.Error() +} + +// newParser creates a parser with the specified input source and options. +func newParser(filename string, b []byte, opts ...Option) *parser { + stats := Stats{ + ChoiceAltCnt: make(map[string]map[string]int), + } + + p := &parser{ + filename: filename, + errs: new(errList), + data: b, + pt: savepoint{position: position{line: 1}}, + recover: true, + cur: current{ + globalStore: make(storeDict), + }, + maxFailPos: position{col: 1, line: 1}, + maxFailExpected: make([]string, 0, 20), + Stats: &stats, + // start rule is rule [0] unless an alternate entrypoint is specified + entrypoint: g.rules[0].name, + } + p.setOptions(opts) + + if p.maxExprCnt == 0 { + p.maxExprCnt = math.MaxUint64 + } + + return p +} + +// setOptions applies the options to the parser. +func (p *parser) setOptions(opts []Option) { + for _, opt := range opts { + opt(p) + } +} + +// nolint: structcheck,deadcode +type resultTuple struct { + v any + b bool + end savepoint +} + +// nolint: varcheck +const choiceNoMatch = -1 + +// Stats stores some statistics, gathered during parsing +type Stats struct { + // ExprCnt counts the number of expressions processed during parsing + // This value is compared to the maximum number of expressions allowed + // (set by the MaxExpressions option). + ExprCnt uint64 + + // ChoiceAltCnt is used to count for each ordered choice expression, + // which alternative is used how may times. + // These numbers allow to optimize the order of the ordered choice expression + // to increase the performance of the parser + // + // The outer key of ChoiceAltCnt is composed of the name of the rule as well + // as the line and the column of the ordered choice. + // The inner key of ChoiceAltCnt is the number (one-based) of the matching alternative. + // For each alternative the number of matches are counted. If an ordered choice does not + // match, a special counter is incremented. The name of this counter is set with + // the parser option Statistics. + // For an alternative to be included in ChoiceAltCnt, it has to match at least once. + ChoiceAltCnt map[string]map[string]int +} + +// nolint: structcheck,maligned +type parser struct { + filename string + pt savepoint + cur current + + data []byte + errs *errList + + depth int + recover bool + + // rules table, maps the rule identifier to the rule node + rules map[string]*rule + // variables stack, map of label to value + vstack []map[string]any + // rule stack, allows identification of the current rule in errors + rstack []*rule + + // parse fail + maxFailPos position + maxFailExpected []string + maxFailInvertExpected bool + + // max number of expressions to be parsed + maxExprCnt uint64 + // entrypoint for the parser + entrypoint string + + allowInvalidUTF8 bool + + *Stats + + choiceNoMatch string + // recovery expression stack, keeps track of the currently available recovery expression, these are traversed in reverse + recoveryStack []map[string]any +} + +// push a variable set on the vstack. +func (p *parser) pushV() { + if cap(p.vstack) == len(p.vstack) { + // create new empty slot in the stack + p.vstack = append(p.vstack, nil) + } else { + // slice to 1 more + p.vstack = p.vstack[:len(p.vstack)+1] + } + + // get the last args set + m := p.vstack[len(p.vstack)-1] + if m != nil && len(m) == 0 { + // empty map, all good + return + } + + m = make(map[string]any) + p.vstack[len(p.vstack)-1] = m +} + +// pop a variable set from the vstack. +func (p *parser) popV() { + // if the map is not empty, clear it + m := p.vstack[len(p.vstack)-1] + if len(m) > 0 { + // GC that map + p.vstack[len(p.vstack)-1] = nil + } + p.vstack = p.vstack[:len(p.vstack)-1] +} + +// push a recovery expression with its labels to the recoveryStack +func (p *parser) pushRecovery(labels []string, expr any) { + if cap(p.recoveryStack) == len(p.recoveryStack) { + // create new empty slot in the stack + p.recoveryStack = append(p.recoveryStack, nil) + } else { + // slice to 1 more + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)+1] + } + + m := make(map[string]any, len(labels)) + for _, fl := range labels { + m[fl] = expr + } + p.recoveryStack[len(p.recoveryStack)-1] = m +} + +// pop a recovery expression from the recoveryStack +func (p *parser) popRecovery() { + // GC that map + p.recoveryStack[len(p.recoveryStack)-1] = nil + + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)-1] +} + +func (p *parser) addErr(err error) { + p.addErrAt(err, p.pt.position, []string{}) +} + +func (p *parser) addErrAt(err error, pos position, expected []string) { + var buf bytes.Buffer + if p.filename != "" { + buf.WriteString(p.filename) + } + if buf.Len() > 0 { + buf.WriteString(":") + } + buf.WriteString(fmt.Sprintf("%d:%d (%d)", pos.line, pos.col, pos.offset)) + if len(p.rstack) > 0 { + if buf.Len() > 0 { + buf.WriteString(": ") + } + rule := p.rstack[len(p.rstack)-1] + if rule.displayName != "" { + buf.WriteString("rule " + rule.displayName) + } else { + buf.WriteString("rule " + rule.name) + } + } + pe := &parserError{Inner: err, pos: pos, prefix: buf.String(), expected: expected} + p.errs.add(pe) +} + +func (p *parser) failAt(fail bool, pos position, want string) { + // process fail if parsing fails and not inverted or parsing succeeds and invert is set + if fail == p.maxFailInvertExpected { + if pos.offset < p.maxFailPos.offset { + return + } + + if pos.offset > p.maxFailPos.offset { + p.maxFailPos = pos + p.maxFailExpected = p.maxFailExpected[:0] + } + + if p.maxFailInvertExpected { + want = "!" + want + } + p.maxFailExpected = append(p.maxFailExpected, want) + } +} + +// read advances the parser to the next rune. +func (p *parser) read() { + p.pt.offset += p.pt.w + rn, n := utf8.DecodeRune(p.data[p.pt.offset:]) + p.pt.rn = rn + p.pt.w = n + p.pt.col++ + if rn == '\n' { + p.pt.line++ + p.pt.col = 0 + } + + if rn == utf8.RuneError && n == 1 { // see utf8.DecodeRune + if !p.allowInvalidUTF8 { + p.addErr(errInvalidEncoding) + } + } +} + +// restore parser position to the savepoint pt. +func (p *parser) restore(pt savepoint) { + if pt.offset == p.pt.offset { + return + } + p.pt = pt +} + +// get the slice of bytes from the savepoint start to the current position. +func (p *parser) sliceFrom(start savepoint) []byte { + return p.data[start.position.offset:p.pt.position.offset] +} + +func (p *parser) buildRulesTable(g *grammar) { + p.rules = make(map[string]*rule, len(g.rules)) + for _, r := range g.rules { + p.rules[r.name] = r + } +} + +// nolint: gocyclo +func (p *parser) parse(g *grammar) (val any, err error) { + if len(g.rules) == 0 { + p.addErr(errNoRule) + return nil, p.errs.err() + } + + // TODO : not super critical but this could be generated + p.buildRulesTable(g) + + if p.recover { + // panic can be used in action code to stop parsing immediately + // and return the panic as an error. + defer func() { + if e := recover(); e != nil { + val = nil + switch e := e.(type) { + case error: + p.addErr(e) + default: + p.addErr(fmt.Errorf("%v", e)) + } + err = p.errs.err() + } + }() + } + + startRule, ok := p.rules[p.entrypoint] + if !ok { + p.addErr(errInvalidEntrypoint) + return nil, p.errs.err() + } + + p.read() // advance to first rune + val, ok = p.parseRuleWrap(startRule) + if !ok { + if len(*p.errs) == 0 { + // If parsing fails, but no errors have been recorded, the expected values + // for the farthest parser position are returned as error. + maxFailExpectedMap := make(map[string]struct{}, len(p.maxFailExpected)) + for _, v := range p.maxFailExpected { + maxFailExpectedMap[v] = struct{}{} + } + expected := make([]string, 0, len(maxFailExpectedMap)) + eof := false + if _, ok := maxFailExpectedMap["!."]; ok { + delete(maxFailExpectedMap, "!.") + eof = true + } + for k := range maxFailExpectedMap { + expected = append(expected, k) + } + sort.Strings(expected) + if eof { + expected = append(expected, "EOF") + } + p.addErrAt(errors.New("no match found, expected: "+listJoin(expected, ", ", "or")), p.maxFailPos, expected) + } + + return nil, p.errs.err() + } + return val, p.errs.err() +} + +func listJoin(list []string, sep string, lastSep string) string { + switch len(list) { + case 0: + return "" + case 1: + return list[0] + default: + return strings.Join(list[:len(list)-1], sep) + " " + lastSep + " " + list[len(list)-1] + } +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { + var ( + val any + ok bool + ) + + val, ok = p.parseRule(rule) + + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { + p.rstack = append(p.rstack, rule) + p.pushV() + val, ok := p.parseExprWrap(rule.expr) + p.popV() + p.rstack = p.rstack[:len(p.rstack)-1] + return val, ok +} + +func (p *parser) parseExprWrap(expr any) (any, bool) { + val, ok := p.parseExpr(expr) + + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { + p.ExprCnt++ + if p.ExprCnt > p.maxExprCnt { + panic(errMaxExprCnt) + } + + var val any + var ok bool + switch expr := expr.(type) { + case *actionExpr: + val, ok = p.parseActionExpr(expr) + case *andCodeExpr: + val, ok = p.parseAndCodeExpr(expr) + case *andExpr: + val, ok = p.parseAndExpr(expr) + case *anyMatcher: + val, ok = p.parseAnyMatcher(expr) + case *charClassMatcher: + val, ok = p.parseCharClassMatcher(expr) + case *choiceExpr: + val, ok = p.parseChoiceExpr(expr) + case *labeledExpr: + val, ok = p.parseLabeledExpr(expr) + case *litMatcher: + val, ok = p.parseLitMatcher(expr) + case *notCodeExpr: + val, ok = p.parseNotCodeExpr(expr) + case *notExpr: + val, ok = p.parseNotExpr(expr) + case *oneOrMoreExpr: + val, ok = p.parseOneOrMoreExpr(expr) + case *recoveryExpr: + val, ok = p.parseRecoveryExpr(expr) + case *ruleRefExpr: + val, ok = p.parseRuleRefExpr(expr) + case *seqExpr: + val, ok = p.parseSeqExpr(expr) + case *throwExpr: + val, ok = p.parseThrowExpr(expr) + case *zeroOrMoreExpr: + val, ok = p.parseZeroOrMoreExpr(expr) + case *zeroOrOneExpr: + val, ok = p.parseZeroOrOneExpr(expr) + default: + panic(fmt.Sprintf("unknown expression type %T", expr)) + } + return val, ok +} + +func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { + start := p.pt + val, ok := p.parseExprWrap(act.expr) + if ok { + p.cur.pos = start.position + p.cur.text = p.sliceFrom(start) + actVal, err := act.run(p) + if err != nil { + p.addErrAt(err, start.position, []string{}) + } + + val = actVal + } + return val, ok +} + +func (p *parser) parseAndCodeExpr(and *andCodeExpr) (any, bool) { + + ok, err := and.run(p) + if err != nil { + p.addErr(err) + } + + return nil, ok +} + +func (p *parser) parseAndExpr(and *andExpr) (any, bool) { + pt := p.pt + p.pushV() + _, ok := p.parseExprWrap(and.expr) + p.popV() + p.restore(pt) + + return nil, ok +} + +func (p *parser) parseAnyMatcher(any *anyMatcher) (any, bool) { + if p.pt.rn == utf8.RuneError && p.pt.w == 0 { + // EOF - see utf8.DecodeRune + p.failAt(false, p.pt.position, ".") + return nil, false + } + start := p.pt + p.read() + p.failAt(true, start.position, ".") + return p.sliceFrom(start), true +} + +// nolint: gocyclo +func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (any, bool) { + cur := p.pt.rn + start := p.pt + + // can't match EOF + if cur == utf8.RuneError && p.pt.w == 0 { // see utf8.DecodeRune + p.failAt(false, start.position, chr.val) + return nil, false + } + + if chr.ignoreCase { + cur = unicode.ToLower(cur) + } + + // try to match in the list of available chars + for _, rn := range chr.chars { + if rn == cur { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of ranges + for i := 0; i < len(chr.ranges); i += 2 { + if cur >= chr.ranges[i] && cur <= chr.ranges[i+1] { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of Unicode classes + for _, cl := range chr.classes { + if unicode.Is(cl, cur) { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + if chr.inverted { + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + p.failAt(false, start.position, chr.val) + return nil, false +} + +func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { + + for altI, alt := range ch.alternatives { + // dummy assignment to prevent compile error if optimized + _ = altI + + p.pushV() + val, ok := p.parseExprWrap(alt) + p.popV() + if ok { + return val, ok + } + } + return nil, false +} + +func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { + p.pushV() + val, ok := p.parseExprWrap(lab.expr) + p.popV() + if ok && lab.label != "" { + m := p.vstack[len(p.vstack)-1] + m[lab.label] = val + } + return val, ok +} + +func (p *parser) parseLitMatcher(lit *litMatcher) (any, bool) { + start := p.pt + for _, want := range lit.val { + cur := p.pt.rn + if lit.ignoreCase { + cur = unicode.ToLower(cur) + } + if cur != want { + p.failAt(false, start.position, lit.want) + p.restore(start) + return nil, false + } + p.read() + } + p.failAt(true, start.position, lit.want) + return p.sliceFrom(start), true +} + +func (p *parser) parseNotCodeExpr(not *notCodeExpr) (any, bool) { + ok, err := not.run(p) + if err != nil { + p.addErr(err) + } + + return nil, !ok +} + +func (p *parser) parseNotExpr(not *notExpr) (any, bool) { + pt := p.pt + p.pushV() + p.maxFailInvertExpected = !p.maxFailInvertExpected + _, ok := p.parseExprWrap(not.expr) + p.maxFailInvertExpected = !p.maxFailInvertExpected + p.popV() + p.restore(pt) + + return nil, !ok +} + +func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + if len(vals) == 0 { + // did not match once, no match + return nil, false + } + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { + + p.pushRecovery(recover.failureLabel, recover.recoverExpr) + val, ok := p.parseExprWrap(recover.expr) + p.popRecovery() + + return val, ok +} + +func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { + if ref.name == "" { + panic(fmt.Sprintf("%s: invalid rule: missing name", ref.pos)) + } + + rule := p.rules[ref.name] + if rule == nil { + p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) + return nil, false + } + return p.parseRuleWrap(rule) +} + +func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { + vals := make([]any, 0, len(seq.exprs)) + + pt := p.pt + for _, expr := range seq.exprs { + val, ok := p.parseExprWrap(expr) + if !ok { + p.restore(pt) + return nil, false + } + vals = append(vals, val) + } + return vals, true +} + +func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { + + for i := len(p.recoveryStack) - 1; i >= 0; i-- { + if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { + return val, ok + } + } + } + + return nil, false +} + +func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { + p.pushV() + val, _ := p.parseExprWrap(expr.expr) + p.popV() + // whether it matched or not, consider it a match + return val, true +} diff --git a/test/left_recursion/standart/leftrecursion/left_recursion.go b/test/left_recursion/standart/leftrecursion/left_recursion.go new file mode 100644 index 0000000..d7f6020 --- /dev/null +++ b/test/left_recursion/standart/leftrecursion/left_recursion.go @@ -0,0 +1,1745 @@ +// Code generated by pigeon; DO NOT EDIT. + +package leftrecursion + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "os" + "sort" + "strconv" + "strings" + "sync" + "unicode" + "unicode/utf8" +) + +var g = &grammar{ + rules: []*rule{ + { + name: "start", + pos: position{line: 5, col: 1, offset: 29}, + expr: &actionExpr{ + pos: position{line: 5, col: 9, offset: 37}, + run: (*parser).callonstart1, + expr: &seqExpr{ + pos: position{line: 5, col: 9, offset: 37}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 5, col: 9, offset: 37}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 5, col: 11, offset: 39}, + name: "expr", + }, + }, + ¬Expr{ + pos: position{line: 5, col: 16, offset: 44}, + expr: &anyMatcher{ + line: 5, col: 17, offset: 45, + }, + }, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "expr", + pos: position{line: 9, col: 1, offset: 67}, + expr: &choiceExpr{ + pos: position{line: 9, col: 9, offset: 75}, + alternatives: []any{ + &actionExpr{ + pos: position{line: 9, col: 9, offset: 75}, + run: (*parser).callonexpr2, + expr: &seqExpr{ + pos: position{line: 9, col: 9, offset: 75}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 9, col: 9, offset: 75}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 9, col: 11, offset: 77}, + name: "expr", + }, + }, + &labeledExpr{ + pos: position{line: 9, col: 16, offset: 82}, + label: "op", + expr: &choiceExpr{ + pos: position{line: 9, col: 20, offset: 86}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 9, col: 20, offset: 86}, + val: "+", + ignoreCase: false, + want: "\"+\"", + }, + &litMatcher{ + pos: position{line: 9, col: 26, offset: 92}, + val: "-", + ignoreCase: false, + want: "\"-\"", + }, + }, + }, + }, + &labeledExpr{ + pos: position{line: 9, col: 31, offset: 97}, + label: "b", + expr: &ruleRefExpr{ + pos: position{line: 9, col: 33, offset: 99}, + name: "term", + }, + }, + }, + }, + }, + &actionExpr{ + pos: position{line: 14, col: 5, offset: 237}, + run: (*parser).callonexpr12, + expr: &labeledExpr{ + pos: position{line: 14, col: 5, offset: 237}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 14, col: 7, offset: 239}, + name: "term", + }, + }, + }, + }, + }, + leader: true, + leftRecursive: true, + }, + { + name: "term", + pos: position{line: 19, col: 1, offset: 293}, + expr: &choiceExpr{ + pos: position{line: 19, col: 8, offset: 300}, + alternatives: []any{ + &actionExpr{ + pos: position{line: 19, col: 8, offset: 300}, + run: (*parser).callonterm2, + expr: &seqExpr{ + pos: position{line: 19, col: 8, offset: 300}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 19, col: 8, offset: 300}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 19, col: 10, offset: 302}, + name: "term", + }, + }, + &labeledExpr{ + pos: position{line: 19, col: 15, offset: 307}, + label: "op", + expr: &choiceExpr{ + pos: position{line: 19, col: 19, offset: 311}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 19, col: 19, offset: 311}, + val: "*", + ignoreCase: false, + want: "\"*\"", + }, + &litMatcher{ + pos: position{line: 19, col: 25, offset: 317}, + val: "/", + ignoreCase: false, + want: "\"/\"", + }, + &litMatcher{ + pos: position{line: 19, col: 31, offset: 323}, + val: "%", + ignoreCase: false, + want: "\"%\"", + }, + }, + }, + }, + &labeledExpr{ + pos: position{line: 19, col: 36, offset: 328}, + label: "b", + expr: &ruleRefExpr{ + pos: position{line: 19, col: 38, offset: 330}, + name: "factor", + }, + }, + }, + }, + }, + &actionExpr{ + pos: position{line: 25, col: 5, offset: 472}, + run: (*parser).callonterm13, + expr: &labeledExpr{ + pos: position{line: 25, col: 5, offset: 472}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 25, col: 7, offset: 474}, + name: "factor", + }, + }, + }, + }, + }, + leader: true, + leftRecursive: true, + }, + { + name: "factor", + pos: position{line: 30, col: 1, offset: 530}, + expr: &choiceExpr{ + pos: position{line: 30, col: 10, offset: 539}, + alternatives: []any{ + &actionExpr{ + pos: position{line: 30, col: 10, offset: 539}, + run: (*parser).callonfactor2, + expr: &seqExpr{ + pos: position{line: 30, col: 10, offset: 539}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 30, col: 10, offset: 539}, + label: "op", + expr: &choiceExpr{ + pos: position{line: 30, col: 14, offset: 543}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 30, col: 14, offset: 543}, + val: "+", + ignoreCase: false, + want: "\"+\"", + }, + &litMatcher{ + pos: position{line: 30, col: 20, offset: 549}, + val: "-", + ignoreCase: false, + want: "\"-\"", + }, + }, + }, + }, + &labeledExpr{ + pos: position{line: 30, col: 25, offset: 554}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 30, col: 27, offset: 556}, + name: "factor", + }, + }, + }, + }, + }, + &actionExpr{ + pos: position{line: 34, col: 5, offset: 666}, + run: (*parser).callonfactor10, + expr: &ruleRefExpr{ + pos: position{line: 34, col: 5, offset: 666}, + name: "atom", + }, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "atom", + pos: position{line: 38, col: 1, offset: 707}, + expr: &oneOrMoreExpr{ + pos: position{line: 38, col: 8, offset: 714}, + expr: &charClassMatcher{ + pos: position{line: 38, col: 8, offset: 714}, + val: "[0-9]", + ranges: []rune{'0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + leader: false, + leftRecursive: false, + }, + }, +} + +func (c *current) onstart1(a any) (any, error) { + return a, nil +} + +func (p *parser) callonstart1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onstart1(stack["a"]) +} + +func (c *current) onexpr2(a, op, b any) (any, error) { + strA := a.(string) + strB := b.(string) + strOp := string(op.([]byte)) + return "(" + strA + strOp + strB + ")", nil +} + +func (p *parser) callonexpr2() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onexpr2(stack["a"], stack["op"], stack["b"]) +} + +func (c *current) onexpr12(a any) (any, error) { + strA := a.(string) + return strA, nil +} + +func (p *parser) callonexpr12() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onexpr12(stack["a"]) +} + +func (c *current) onterm2(a, op, b any) (any, error) { + strA := a.(string) + strB := b.(string) + strOp := string(op.([]byte)) + return "(" + strA + strOp + strB + ")", nil + +} + +func (p *parser) callonterm2() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onterm2(stack["a"], stack["op"], stack["b"]) +} + +func (c *current) onterm13(a any) (any, error) { + strA := a.(string) + return strA, nil +} + +func (p *parser) callonterm13() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onterm13(stack["a"]) +} + +func (c *current) onfactor2(op, a any) (any, error) { + strA := a.(string) + strOp := string(op.([]byte)) + return "(" + strOp + strA + ")", nil +} + +func (p *parser) callonfactor2() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onfactor2(stack["op"], stack["a"]) +} + +func (c *current) onfactor10() (any, error) { + return string(c.text), nil +} + +func (p *parser) callonfactor10() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onfactor10() +} + +var ( + // errNoRule is returned when the grammar to parse has no rule. + errNoRule = errors.New("grammar has no rule") + + // errInvalidEntrypoint is returned when the specified entrypoint rule + // does not exit. + errInvalidEntrypoint = errors.New("invalid entrypoint") + + // errInvalidEncoding is returned when the source is not properly + // utf8-encoded. + errInvalidEncoding = errors.New("invalid encoding") + + // errMaxExprCnt is used to signal that the maximum number of + // expressions have been parsed. + errMaxExprCnt = errors.New("max number of expresssions parsed") +) + +// Option is a function that can set an option on the parser. It returns +// the previous setting as an Option. +type Option func(*parser) Option + +// MaxExpressions creates an Option to stop parsing after the provided +// number of expressions have been parsed, if the value is 0 then the parser will +// parse for as many steps as needed (possibly an infinite number). +// +// The default for maxExprCnt is 0. +func MaxExpressions(maxExprCnt uint64) Option { + return func(p *parser) Option { + oldMaxExprCnt := p.maxExprCnt + p.maxExprCnt = maxExprCnt + return MaxExpressions(oldMaxExprCnt) + } +} + +// Entrypoint creates an Option to set the rule name to use as entrypoint. +// The rule name must have been specified in the -alternate-entrypoints +// if generating the parser with the -optimize-grammar flag, otherwise +// it may have been optimized out. Passing an empty string sets the +// entrypoint to the first rule in the grammar. +// +// The default is to start parsing at the first rule in the grammar. +func Entrypoint(ruleName string) Option { + return func(p *parser) Option { + oldEntrypoint := p.entrypoint + p.entrypoint = ruleName + if ruleName == "" { + p.entrypoint = g.rules[0].name + } + return Entrypoint(oldEntrypoint) + } +} + +// Statistics adds a user provided Stats struct to the parser to allow +// the user to process the results after the parsing has finished. +// Also the key for the "no match" counter is set. +// +// Example usage: +// +// input := "input" +// stats := Stats{} +// _, err := Parse("input-file", []byte(input), Statistics(&stats, "no match")) +// if err != nil { +// log.Panicln(err) +// } +// b, err := json.MarshalIndent(stats.ChoiceAltCnt, "", " ") +// if err != nil { +// log.Panicln(err) +// } +// fmt.Println(string(b)) +func Statistics(stats *Stats, choiceNoMatch string) Option { + return func(p *parser) Option { + oldStats := p.Stats + p.Stats = stats + oldChoiceNoMatch := p.choiceNoMatch + p.choiceNoMatch = choiceNoMatch + if p.Stats.ChoiceAltCnt == nil { + p.Stats.ChoiceAltCnt = make(map[string]map[string]int) + } + return Statistics(oldStats, oldChoiceNoMatch) + } +} + +// Debug creates an Option to set the debug flag to b. When set to true, +// debugging information is printed to stdout while parsing. +// +// The default is false. +func Debug(b bool) Option { + return func(p *parser) Option { + old := p.debug + p.debug = b + return Debug(old) + } +} + +// Memoize creates an Option to set the memoize flag to b. When set to true, +// the parser will cache all results so each expression is evaluated only +// once. This guarantees linear parsing time even for pathological cases, +// at the expense of more memory and slower times for typical cases. +// +// The default is false. +func Memoize(b bool) Option { + return func(p *parser) Option { + old := p.memoize + p.memoize = b + return Memoize(old) + } +} + +// AllowInvalidUTF8 creates an Option to allow invalid UTF-8 bytes. +// Every invalid UTF-8 byte is treated as a utf8.RuneError (U+FFFD) +// by character class matchers and is matched by the any matcher. +// The returned matched value, c.text and c.offset are NOT affected. +// +// The default is false. +func AllowInvalidUTF8(b bool) Option { + return func(p *parser) Option { + old := p.allowInvalidUTF8 + p.allowInvalidUTF8 = b + return AllowInvalidUTF8(old) + } +} + +// Recover creates an Option to set the recover flag to b. When set to +// true, this causes the parser to recover from panics and convert it +// to an error. Setting it to false can be useful while debugging to +// access the full stack trace. +// +// The default is true. +func Recover(b bool) Option { + return func(p *parser) Option { + old := p.recover + p.recover = b + return Recover(old) + } +} + +// GlobalStore creates an Option to set a key to a certain value in +// the globalStore. +func GlobalStore(key string, value any) Option { + return func(p *parser) Option { + old := p.cur.globalStore[key] + p.cur.globalStore[key] = value + return GlobalStore(key, old) + } +} + +// InitState creates an Option to set a key to a certain value in +// the global "state" store. +func InitState(key string, value any) Option { + return func(p *parser) Option { + old := p.cur.state[key] + p.cur.state[key] = value + return InitState(key, old) + } +} + +// ParseFile parses the file identified by filename. +func ParseFile(filename string, opts ...Option) (i any, err error) { // nolint: deadcode + f, err := os.Open(filename) + if err != nil { + return nil, err + } + defer func() { + if closeErr := f.Close(); closeErr != nil { + err = closeErr + } + }() + return ParseReader(filename, f, opts...) +} + +// ParseReader parses the data from r using filename as information in the +// error messages. +func ParseReader(filename string, r io.Reader, opts ...Option) (any, error) { // nolint: deadcode + b, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + return Parse(filename, b, opts...) +} + +// Parse parses the data from b using filename as information in the +// error messages. +func Parse(filename string, b []byte, opts ...Option) (any, error) { + return newParser(filename, b, opts...).parse(g) +} + +// position records a position in the text. +type position struct { + line, col, offset int +} + +func (p position) String() string { + return strconv.Itoa(p.line) + ":" + strconv.Itoa(p.col) + " [" + strconv.Itoa(p.offset) + "]" +} + +// savepoint stores all state required to go back to this point in the +// parser. +type savepoint struct { + position + rn rune + w int +} + +type current struct { + pos position // start position of the match + text []byte // raw text of the match + + // state is a store for arbitrary key,value pairs that the user wants to be + // tied to the backtracking of the parser. + // This is always rolled back if a parsing rule fails. + state storeDict + + // globalStore is a general store for the user to store arbitrary key-value + // pairs that they need to manage and that they do not want tied to the + // backtracking of the parser. This is only modified by the user and never + // rolled back by the parser. It is always up to the user to keep this in a + // consistent state. + globalStore storeDict +} + +type storeDict map[string]any + +// the AST types... + +// nolint: structcheck +type grammar struct { + pos position + rules []*rule +} + +// nolint: structcheck +type rule struct { + pos position + name string + displayName string + expr any + + leader bool + leftRecursive bool +} + +// nolint: structcheck +type choiceExpr struct { + pos position + alternatives []any +} + +// nolint: structcheck +type actionExpr struct { + pos position + expr any + run func(*parser) (any, error) +} + +// nolint: structcheck +type recoveryExpr struct { + pos position + expr any + recoverExpr any + failureLabel []string +} + +// nolint: structcheck +type seqExpr struct { + pos position + exprs []any +} + +// nolint: structcheck +type throwExpr struct { + pos position + label string +} + +// nolint: structcheck +type labeledExpr struct { + pos position + label string + expr any +} + +// nolint: structcheck +type expr struct { + pos position + expr any +} + +type ( + andExpr expr // nolint: structcheck + notExpr expr // nolint: structcheck + zeroOrOneExpr expr // nolint: structcheck + zeroOrMoreExpr expr // nolint: structcheck + oneOrMoreExpr expr // nolint: structcheck +) + +// nolint: structcheck +type ruleRefExpr struct { + pos position + name string +} + +// nolint: structcheck +type stateCodeExpr struct { + pos position + run func(*parser) error +} + +// nolint: structcheck +type andCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +// nolint: structcheck +type notCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +// nolint: structcheck +type litMatcher struct { + pos position + val string + ignoreCase bool + want string +} + +// nolint: structcheck +type charClassMatcher struct { + pos position + val string + basicLatinChars [128]bool + chars []rune + ranges []rune + classes []*unicode.RangeTable + ignoreCase bool + inverted bool +} + +type anyMatcher position // nolint: structcheck + +// errList cumulates the errors found by the parser. +type errList []error + +func (e *errList) add(err error) { + *e = append(*e, err) +} + +func (e errList) err() error { + if len(e) == 0 { + return nil + } + e.dedupe() + return e +} + +func (e *errList) dedupe() { + var cleaned []error + set := make(map[string]bool) + for _, err := range *e { + if msg := err.Error(); !set[msg] { + set[msg] = true + cleaned = append(cleaned, err) + } + } + *e = cleaned +} + +func (e errList) Error() string { + switch len(e) { + case 0: + return "" + case 1: + return e[0].Error() + default: + var buf bytes.Buffer + + for i, err := range e { + if i > 0 { + buf.WriteRune('\n') + } + buf.WriteString(err.Error()) + } + return buf.String() + } +} + +// parserError wraps an error with a prefix indicating the rule in which +// the error occurred. The original error is stored in the Inner field. +type parserError struct { + Inner error + pos position + prefix string + expected []string +} + +// Error returns the error message. +func (p *parserError) Error() string { + return p.prefix + ": " + p.Inner.Error() +} + +// newParser creates a parser with the specified input source and options. +func newParser(filename string, b []byte, opts ...Option) *parser { + stats := Stats{ + ChoiceAltCnt: make(map[string]map[string]int), + } + + p := &parser{ + filename: filename, + errs: new(errList), + data: b, + pt: savepoint{position: position{line: 1}}, + recover: true, + cur: current{ + state: make(storeDict), + globalStore: make(storeDict), + }, + maxFailPos: position{col: 1, line: 1}, + maxFailExpected: make([]string, 0, 20), + Stats: &stats, + // start rule is rule [0] unless an alternate entrypoint is specified + entrypoint: g.rules[0].name, + } + p.setOptions(opts) + + if p.maxExprCnt == 0 { + p.maxExprCnt = math.MaxUint64 + } + + return p +} + +// setOptions applies the options to the parser. +func (p *parser) setOptions(opts []Option) { + for _, opt := range opts { + opt(p) + } +} + +// nolint: structcheck,deadcode +type resultTuple struct { + v any + b bool + end savepoint +} + +// nolint: varcheck +const choiceNoMatch = -1 + +// Stats stores some statistics, gathered during parsing +type Stats struct { + // ExprCnt counts the number of expressions processed during parsing + // This value is compared to the maximum number of expressions allowed + // (set by the MaxExpressions option). + ExprCnt uint64 + + // ChoiceAltCnt is used to count for each ordered choice expression, + // which alternative is used how may times. + // These numbers allow to optimize the order of the ordered choice expression + // to increase the performance of the parser + // + // The outer key of ChoiceAltCnt is composed of the name of the rule as well + // as the line and the column of the ordered choice. + // The inner key of ChoiceAltCnt is the number (one-based) of the matching alternative. + // For each alternative the number of matches are counted. If an ordered choice does not + // match, a special counter is incremented. The name of this counter is set with + // the parser option Statistics. + // For an alternative to be included in ChoiceAltCnt, it has to match at least once. + ChoiceAltCnt map[string]map[string]int +} + +type ruleWithExpsStack struct { + rule *rule + estack []any +} + +// nolint: structcheck,maligned +type parser struct { + filename string + pt savepoint + cur current + + data []byte + errs *errList + + depth int + recover bool + debug bool + + memoize bool + // memoization table for the packrat algorithm: + // map[offset in source] map[expression or rule] {value, match} + memo map[int]map[any]resultTuple + + // rules table, maps the rule identifier to the rule node + rules map[string]*rule + // variables stack, map of label to value + vstack []map[string]any + // rule stack, allows identification of the current rule in errors + rstack []*rule + + // parse fail + maxFailPos position + maxFailExpected []string + maxFailInvertExpected bool + + // max number of expressions to be parsed + maxExprCnt uint64 + // entrypoint for the parser + entrypoint string + + allowInvalidUTF8 bool + + *Stats + + choiceNoMatch string + // recovery expression stack, keeps track of the currently available recovery expression, these are traversed in reverse + recoveryStack []map[string]any +} + +// push a variable set on the vstack. +func (p *parser) pushV() { + if cap(p.vstack) == len(p.vstack) { + // create new empty slot in the stack + p.vstack = append(p.vstack, nil) + } else { + // slice to 1 more + p.vstack = p.vstack[:len(p.vstack)+1] + } + + // get the last args set + m := p.vstack[len(p.vstack)-1] + if m != nil && len(m) == 0 { + // empty map, all good + return + } + + m = make(map[string]any) + p.vstack[len(p.vstack)-1] = m +} + +// pop a variable set from the vstack. +func (p *parser) popV() { + // if the map is not empty, clear it + m := p.vstack[len(p.vstack)-1] + if len(m) > 0 { + // GC that map + p.vstack[len(p.vstack)-1] = nil + } + p.vstack = p.vstack[:len(p.vstack)-1] +} + +// push a recovery expression with its labels to the recoveryStack +func (p *parser) pushRecovery(labels []string, expr any) { + if cap(p.recoveryStack) == len(p.recoveryStack) { + // create new empty slot in the stack + p.recoveryStack = append(p.recoveryStack, nil) + } else { + // slice to 1 more + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)+1] + } + + m := make(map[string]any, len(labels)) + for _, fl := range labels { + m[fl] = expr + } + p.recoveryStack[len(p.recoveryStack)-1] = m +} + +// pop a recovery expression from the recoveryStack +func (p *parser) popRecovery() { + // GC that map + p.recoveryStack[len(p.recoveryStack)-1] = nil + + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)-1] +} + +func (p *parser) print(prefix, s string) string { + if !p.debug { + return s + } + + fmt.Printf("%s %d:%d:%d: %s [%#U]\n", + prefix, p.pt.line, p.pt.col, p.pt.offset, s, p.pt.rn) + return s +} + +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + +func (p *parser) in(s string) string { + res := p.printIndent(">", s) + p.depth++ + return res +} + +func (p *parser) out(s string) string { + p.depth-- + return p.printIndent("<", s) +} + +func (p *parser) addErr(err error) { + p.addErrAt(err, p.pt.position, []string{}) +} + +func (p *parser) addErrAt(err error, pos position, expected []string) { + var buf bytes.Buffer + if p.filename != "" { + buf.WriteString(p.filename) + } + if buf.Len() > 0 { + buf.WriteString(":") + } + buf.WriteString(fmt.Sprintf("%d:%d (%d)", pos.line, pos.col, pos.offset)) + if len(p.rstack) > 0 { + if buf.Len() > 0 { + buf.WriteString(": ") + } + rule := p.rstack[len(p.rstack)-1] + if rule.displayName != "" { + buf.WriteString("rule " + rule.displayName) + } else { + buf.WriteString("rule " + rule.name) + } + } + pe := &parserError{Inner: err, pos: pos, prefix: buf.String(), expected: expected} + p.errs.add(pe) +} + +func (p *parser) failAt(fail bool, pos position, want string) { + // process fail if parsing fails and not inverted or parsing succeeds and invert is set + if fail == p.maxFailInvertExpected { + if pos.offset < p.maxFailPos.offset { + return + } + + if pos.offset > p.maxFailPos.offset { + p.maxFailPos = pos + p.maxFailExpected = p.maxFailExpected[:0] + } + + if p.maxFailInvertExpected { + want = "!" + want + } + p.maxFailExpected = append(p.maxFailExpected, want) + } +} + +// read advances the parser to the next rune. +func (p *parser) read() { + p.pt.offset += p.pt.w + rn, n := utf8.DecodeRune(p.data[p.pt.offset:]) + p.pt.rn = rn + p.pt.w = n + p.pt.col++ + if rn == '\n' { + p.pt.line++ + p.pt.col = 0 + } + + if rn == utf8.RuneError && n == 1 { // see utf8.DecodeRune + if !p.allowInvalidUTF8 { + p.addErr(errInvalidEncoding) + } + } +} + +// restore parser position to the savepoint pt. +func (p *parser) restore(pt savepoint) { + if p.debug { + defer p.out(p.in("restore")) + } + if pt.offset == p.pt.offset { + return + } + p.pt = pt +} + +// Cloner is implemented by any value that has a Clone method, which returns a +// copy of the value. This is mainly used for types which are not passed by +// value (e.g map, slice, chan) or structs that contain such types. +// +// This is used in conjunction with the global state feature to create proper +// copies of the state to allow the parser to properly restore the state in +// the case of backtracking. +type Cloner interface { + Clone() any +} + +var statePool = &sync.Pool{ + New: func() any { return make(storeDict) }, +} + +func (sd storeDict) Discard() { + for k := range sd { + delete(sd, k) + } + statePool.Put(sd) +} + +// clone and return parser current state. +func (p *parser) cloneState() storeDict { + if p.debug { + defer p.out(p.in("cloneState")) + } + + state := statePool.Get().(storeDict) + for k, v := range p.cur.state { + if c, ok := v.(Cloner); ok { + state[k] = c.Clone() + } else { + state[k] = v + } + } + return state +} + +// restore parser current state to the state storeDict. +// every restoreState should applied only one time for every cloned state +func (p *parser) restoreState(state storeDict) { + if p.debug { + defer p.out(p.in("restoreState")) + } + p.cur.state.Discard() + p.cur.state = state +} + +// get the slice of bytes from the savepoint start to the current position. +func (p *parser) sliceFrom(start savepoint) []byte { + return p.data[start.position.offset:p.pt.position.offset] +} + +func (p *parser) getMemoized(node any) (resultTuple, bool) { + if len(p.memo) == 0 { + return resultTuple{}, false + } + m := p.memo[p.pt.offset] + if len(m) == 0 { + return resultTuple{}, false + } + res, ok := m[node] + return res, ok +} + +func (p *parser) setMemoized(pt savepoint, node any, tuple resultTuple) { + if p.memo == nil { + p.memo = make(map[int]map[any]resultTuple) + } + m := p.memo[pt.offset] + if m == nil { + m = make(map[any]resultTuple) + p.memo[pt.offset] = m + } + m[node] = tuple +} + +func (p *parser) buildRulesTable(g *grammar) { + p.rules = make(map[string]*rule, len(g.rules)) + for _, r := range g.rules { + p.rules[r.name] = r + } +} + +// nolint: gocyclo +func (p *parser) parse(g *grammar) (val any, err error) { + if len(g.rules) == 0 { + p.addErr(errNoRule) + return nil, p.errs.err() + } + + // TODO : not super critical but this could be generated + p.buildRulesTable(g) + + if p.recover { + // panic can be used in action code to stop parsing immediately + // and return the panic as an error. + defer func() { + if e := recover(); e != nil { + if p.debug { + defer p.out(p.in("panic handler")) + } + val = nil + switch e := e.(type) { + case error: + p.addErr(e) + default: + p.addErr(fmt.Errorf("%v", e)) + } + err = p.errs.err() + } + }() + } + + startRule, ok := p.rules[p.entrypoint] + if !ok { + p.addErr(errInvalidEntrypoint) + return nil, p.errs.err() + } + + p.read() // advance to first rune + val, ok = p.parseRuleWrap(startRule) + if !ok { + if len(*p.errs) == 0 { + // If parsing fails, but no errors have been recorded, the expected values + // for the farthest parser position are returned as error. + maxFailExpectedMap := make(map[string]struct{}, len(p.maxFailExpected)) + for _, v := range p.maxFailExpected { + maxFailExpectedMap[v] = struct{}{} + } + expected := make([]string, 0, len(maxFailExpectedMap)) + eof := false + if _, ok := maxFailExpectedMap["!."]; ok { + delete(maxFailExpectedMap, "!.") + eof = true + } + for k := range maxFailExpectedMap { + expected = append(expected, k) + } + sort.Strings(expected) + if eof { + expected = append(expected, "EOF") + } + p.addErrAt(errors.New("no match found, expected: "+listJoin(expected, ", ", "or")), p.maxFailPos, expected) + } + + return nil, p.errs.err() + } + return val, p.errs.err() +} + +func listJoin(list []string, sep string, lastSep string) string { + switch len(list) { + case 0: + return "" + case 1: + return list[0] + default: + return strings.Join(list[:len(list)-1], sep) + " " + lastSep + " " + list[len(list)-1] + } +} + +func (p *parser) parseRuleRecursiveLeader(rule *rule) (any, bool) { + result, ok := p.getMemoized(rule) + if ok { + p.restore(result.end) + return result.v, result.b + } + + if p.debug { + defer p.out(p.in("recursive " + rule.name)) + } + + var ( + depth = 0 + startMark = p.pt + lastResult = resultTuple{nil, false, startMark} + lastErrors = *p.errs + ) + + for { + lastState := p.cloneState() + p.setMemoized(startMark, rule, lastResult) + val, ok := p.parseRule(rule) + endMark := p.pt + if p.debug { + p.printIndent("RECURSIVE", fmt.Sprintf( + "Rule %s depth %d: %t -> %s", + rule.name, depth, ok, string(p.sliceFrom(startMark)))) + } + if (!ok) || (endMark.offset <= lastResult.end.offset && depth != 0) { + p.restoreState(lastState) + *p.errs = lastErrors + break + } + lastResult = resultTuple{val, ok, endMark} + lastErrors = *p.errs + p.restore(startMark) + depth++ + } + + p.restore(lastResult.end) + p.setMemoized(startMark, rule, lastResult) + return lastResult.v, lastResult.b +} + +func (p *parser) parseRuleRecursiveNoLeader(rule *rule) (any, bool) { + return p.parseRule(rule) +} + +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { + if p.debug { + defer p.out(p.in("parseRule " + rule.name)) + } + var ( + val any + ok bool + startMark = p.pt + ) + + if p.memoize || rule.leftRecursive { + if rule.leader { + val, ok = p.parseRuleRecursiveLeader(rule) + } else if p.memoize && !rule.leftRecursive { + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRuleRecursiveNoLeader(rule) + } + } else { + val, ok = p.parseRule(rule) + } + + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { + p.rstack = append(p.rstack, rule) + p.pushV() + val, ok := p.parseExprWrap(rule.expr) + p.popV() + p.rstack = p.rstack[:len(p.rstack)-1] + return val, ok +} + +func (p *parser) parseExprWrap(expr any) (any, bool) { + var pt savepoint + + isLeftRecusion := p.rstack[len(p.rstack)-1].leftRecursive + if p.memoize && !isLeftRecusion { + res, ok := p.getMemoized(expr) + if ok { + p.restore(res.end) + return res.v, res.b + } + pt = p.pt + } + + val, ok := p.parseExpr(expr) + + if p.memoize && !isLeftRecusion { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { + p.ExprCnt++ + if p.ExprCnt > p.maxExprCnt { + panic(errMaxExprCnt) + } + + var val any + var ok bool + switch expr := expr.(type) { + case *actionExpr: + val, ok = p.parseActionExpr(expr) + case *andCodeExpr: + val, ok = p.parseAndCodeExpr(expr) + case *andExpr: + val, ok = p.parseAndExpr(expr) + case *anyMatcher: + val, ok = p.parseAnyMatcher(expr) + case *charClassMatcher: + val, ok = p.parseCharClassMatcher(expr) + case *choiceExpr: + val, ok = p.parseChoiceExpr(expr) + case *labeledExpr: + val, ok = p.parseLabeledExpr(expr) + case *litMatcher: + val, ok = p.parseLitMatcher(expr) + case *notCodeExpr: + val, ok = p.parseNotCodeExpr(expr) + case *notExpr: + val, ok = p.parseNotExpr(expr) + case *oneOrMoreExpr: + val, ok = p.parseOneOrMoreExpr(expr) + case *recoveryExpr: + val, ok = p.parseRecoveryExpr(expr) + case *ruleRefExpr: + val, ok = p.parseRuleRefExpr(expr) + case *seqExpr: + val, ok = p.parseSeqExpr(expr) + case *stateCodeExpr: + val, ok = p.parseStateCodeExpr(expr) + case *throwExpr: + val, ok = p.parseThrowExpr(expr) + case *zeroOrMoreExpr: + val, ok = p.parseZeroOrMoreExpr(expr) + case *zeroOrOneExpr: + val, ok = p.parseZeroOrOneExpr(expr) + default: + panic(fmt.Sprintf("unknown expression type %T", expr)) + } + return val, ok +} + +func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseActionExpr")) + } + + start := p.pt + val, ok := p.parseExprWrap(act.expr) + if ok { + p.cur.pos = start.position + p.cur.text = p.sliceFrom(start) + state := p.cloneState() + actVal, err := act.run(p) + if err != nil { + p.addErrAt(err, start.position, []string{}) + } + p.restoreState(state) + + val = actVal + } + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(start))) + } + return val, ok +} + +func (p *parser) parseAndCodeExpr(and *andCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseAndCodeExpr")) + } + + state := p.cloneState() + + ok, err := and.run(p) + if err != nil { + p.addErr(err) + } + p.restoreState(state) + + return nil, ok +} + +func (p *parser) parseAndExpr(and *andExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseAndExpr")) + } + + pt := p.pt + state := p.cloneState() + p.pushV() + _, ok := p.parseExprWrap(and.expr) + p.popV() + p.restoreState(state) + p.restore(pt) + + return nil, ok +} + +func (p *parser) parseAnyMatcher(any *anyMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseAnyMatcher")) + } + + if p.pt.rn == utf8.RuneError && p.pt.w == 0 { + // EOF - see utf8.DecodeRune + p.failAt(false, p.pt.position, ".") + return nil, false + } + start := p.pt + p.read() + p.failAt(true, start.position, ".") + return p.sliceFrom(start), true +} + +// nolint: gocyclo +func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseCharClassMatcher")) + } + + cur := p.pt.rn + start := p.pt + + // can't match EOF + if cur == utf8.RuneError && p.pt.w == 0 { // see utf8.DecodeRune + p.failAt(false, start.position, chr.val) + return nil, false + } + + if chr.ignoreCase { + cur = unicode.ToLower(cur) + } + + // try to match in the list of available chars + for _, rn := range chr.chars { + if rn == cur { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of ranges + for i := 0; i < len(chr.ranges); i += 2 { + if cur >= chr.ranges[i] && cur <= chr.ranges[i+1] { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of Unicode classes + for _, cl := range chr.classes { + if unicode.Is(cl, cur) { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + if chr.inverted { + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + p.failAt(false, start.position, chr.val) + return nil, false +} + +func (p *parser) incChoiceAltCnt(ch *choiceExpr, altI int) { + choiceIdent := fmt.Sprintf("%s %d:%d", p.rstack[len(p.rstack)-1].name, ch.pos.line, ch.pos.col) + m := p.ChoiceAltCnt[choiceIdent] + if m == nil { + m = make(map[string]int) + p.ChoiceAltCnt[choiceIdent] = m + } + // We increment altI by 1, so the keys do not start at 0 + alt := strconv.Itoa(altI + 1) + if altI == choiceNoMatch { + alt = p.choiceNoMatch + } + m[alt]++ +} + +func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseChoiceExpr")) + } + + for altI, alt := range ch.alternatives { + // dummy assignment to prevent compile error if optimized + _ = altI + + state := p.cloneState() + + p.pushV() + val, ok := p.parseExprWrap(alt) + p.popV() + if ok { + p.incChoiceAltCnt(ch, altI) + return val, ok + } + p.restoreState(state) + } + p.incChoiceAltCnt(ch, choiceNoMatch) + return nil, false +} + +func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseLabeledExpr")) + } + + p.pushV() + val, ok := p.parseExprWrap(lab.expr) + p.popV() + if ok && lab.label != "" { + m := p.vstack[len(p.vstack)-1] + m[lab.label] = val + } + return val, ok +} + +func (p *parser) parseLitMatcher(lit *litMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseLitMatcher")) + } + + start := p.pt + for _, want := range lit.val { + cur := p.pt.rn + if lit.ignoreCase { + cur = unicode.ToLower(cur) + } + if cur != want { + p.failAt(false, start.position, lit.want) + p.restore(start) + return nil, false + } + p.read() + } + p.failAt(true, start.position, lit.want) + return p.sliceFrom(start), true +} + +func (p *parser) parseNotCodeExpr(not *notCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseNotCodeExpr")) + } + + state := p.cloneState() + + ok, err := not.run(p) + if err != nil { + p.addErr(err) + } + p.restoreState(state) + + return nil, !ok +} + +func (p *parser) parseNotExpr(not *notExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseNotExpr")) + } + + pt := p.pt + state := p.cloneState() + p.pushV() + p.maxFailInvertExpected = !p.maxFailInvertExpected + _, ok := p.parseExprWrap(not.expr) + p.maxFailInvertExpected = !p.maxFailInvertExpected + p.popV() + p.restoreState(state) + p.restore(pt) + + return nil, !ok +} + +func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseOneOrMoreExpr")) + } + + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + if len(vals) == 0 { + // did not match once, no match + return nil, false + } + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseRecoveryExpr (" + strings.Join(recover.failureLabel, ",") + ")")) + } + + p.pushRecovery(recover.failureLabel, recover.recoverExpr) + val, ok := p.parseExprWrap(recover.expr) + p.popRecovery() + + return val, ok +} + +func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseRuleRefExpr " + ref.name)) + } + + if ref.name == "" { + panic(fmt.Sprintf("%s: invalid rule: missing name", ref.pos)) + } + + rule := p.rules[ref.name] + if rule == nil { + p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) + return nil, false + } + return p.parseRuleWrap(rule) +} + +func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseSeqExpr")) + } + + vals := make([]any, 0, len(seq.exprs)) + + pt := p.pt + state := p.cloneState() + for _, expr := range seq.exprs { + val, ok := p.parseExprWrap(expr) + if !ok { + p.restoreState(state) + p.restore(pt) + return nil, false + } + vals = append(vals, val) + } + return vals, true +} + +func (p *parser) parseStateCodeExpr(state *stateCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseStateCodeExpr")) + } + + err := state.run(p) + if err != nil { + p.addErr(err) + } + return nil, true +} + +func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseThrowExpr")) + } + + for i := len(p.recoveryStack) - 1; i >= 0; i-- { + if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { + return val, ok + } + } + } + + return nil, false +} + +func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseZeroOrMoreExpr")) + } + + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseZeroOrOneExpr")) + } + + p.pushV() + val, _ := p.parseExprWrap(expr.expr) + p.popV() + // whether it matched or not, consider it a match + return val, true +} diff --git a/test/left_recursion/standart/withoutleftrecursion/without_left_recursion.go b/test/left_recursion/standart/withoutleftrecursion/without_left_recursion.go new file mode 100644 index 0000000..51bfe02 --- /dev/null +++ b/test/left_recursion/standart/withoutleftrecursion/without_left_recursion.go @@ -0,0 +1,1638 @@ +// Code generated by pigeon; DO NOT EDIT. + +package withoutleftrecursion + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "os" + "sort" + "strconv" + "strings" + "sync" + "unicode" + "unicode/utf8" +) + +func toAnySlice(v any) []any { + if v == nil { + return nil + } + return v.([]any) +} + +func exprToString(first string, rest any) string { + restSl := toAnySlice(rest) + l := first + for _, v := range restSl { + restExpr := toAnySlice(v) + r := restExpr[1].(string) + op := string(restExpr[0].([]byte)) + l = "(" + l + op + r + ")" + } + return l +} + +var g = &grammar{ + rules: []*rule{ + { + name: "start", + pos: position{line: 24, col: 1, offset: 500}, + expr: &actionExpr{ + pos: position{line: 24, col: 9, offset: 508}, + run: (*parser).callonstart1, + expr: &seqExpr{ + pos: position{line: 24, col: 9, offset: 508}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 24, col: 9, offset: 508}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 24, col: 11, offset: 510}, + name: "expr", + }, + }, + ¬Expr{ + pos: position{line: 24, col: 16, offset: 515}, + expr: &anyMatcher{ + line: 24, col: 17, offset: 516, + }, + }, + }, + }, + }, + }, + { + name: "expr", + pos: position{line: 27, col: 1, offset: 537}, + expr: &actionExpr{ + pos: position{line: 27, col: 8, offset: 544}, + run: (*parser).callonexpr1, + expr: &seqExpr{ + pos: position{line: 27, col: 8, offset: 544}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 27, col: 8, offset: 544}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 27, col: 10, offset: 546}, + name: "term", + }, + }, + &labeledExpr{ + pos: position{line: 27, col: 15, offset: 551}, + label: "b", + expr: &zeroOrMoreExpr{ + pos: position{line: 27, col: 17, offset: 553}, + expr: &seqExpr{ + pos: position{line: 27, col: 18, offset: 554}, + exprs: []any{ + &choiceExpr{ + pos: position{line: 27, col: 20, offset: 556}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 27, col: 20, offset: 556}, + val: "+", + ignoreCase: false, + want: "\"+\"", + }, + &litMatcher{ + pos: position{line: 27, col: 26, offset: 562}, + val: "-", + ignoreCase: false, + want: "\"-\"", + }, + }, + }, + &ruleRefExpr{ + pos: position{line: 27, col: 32, offset: 568}, + name: "term", + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "term", + pos: position{line: 31, col: 1, offset: 641}, + expr: &actionExpr{ + pos: position{line: 31, col: 8, offset: 648}, + run: (*parser).callonterm1, + expr: &seqExpr{ + pos: position{line: 31, col: 8, offset: 648}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 31, col: 8, offset: 648}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 31, col: 10, offset: 650}, + name: "factor", + }, + }, + &labeledExpr{ + pos: position{line: 31, col: 17, offset: 657}, + label: "b", + expr: &zeroOrMoreExpr{ + pos: position{line: 31, col: 19, offset: 659}, + expr: &seqExpr{ + pos: position{line: 31, col: 21, offset: 661}, + exprs: []any{ + &choiceExpr{ + pos: position{line: 31, col: 23, offset: 663}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 31, col: 23, offset: 663}, + val: "*", + ignoreCase: false, + want: "\"*\"", + }, + &litMatcher{ + pos: position{line: 31, col: 29, offset: 669}, + val: "/", + ignoreCase: false, + want: "\"/\"", + }, + &litMatcher{ + pos: position{line: 31, col: 35, offset: 675}, + val: "%", + ignoreCase: false, + want: "\"%\"", + }, + }, + }, + &ruleRefExpr{ + pos: position{line: 31, col: 40, offset: 680}, + name: "factor", + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "factor", + pos: position{line: 35, col: 1, offset: 755}, + expr: &choiceExpr{ + pos: position{line: 35, col: 10, offset: 764}, + alternatives: []any{ + &actionExpr{ + pos: position{line: 35, col: 10, offset: 764}, + run: (*parser).callonfactor2, + expr: &seqExpr{ + pos: position{line: 35, col: 10, offset: 764}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 35, col: 10, offset: 764}, + label: "op", + expr: &choiceExpr{ + pos: position{line: 35, col: 14, offset: 768}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 35, col: 14, offset: 768}, + val: "+", + ignoreCase: false, + want: "\"+\"", + }, + &litMatcher{ + pos: position{line: 35, col: 20, offset: 774}, + val: "-", + ignoreCase: false, + want: "\"-\"", + }, + }, + }, + }, + &labeledExpr{ + pos: position{line: 35, col: 25, offset: 779}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 35, col: 27, offset: 781}, + name: "factor", + }, + }, + }, + }, + }, + &actionExpr{ + pos: position{line: 39, col: 5, offset: 891}, + run: (*parser).callonfactor10, + expr: &ruleRefExpr{ + pos: position{line: 39, col: 5, offset: 891}, + name: "atom", + }, + }, + }, + }, + }, + { + name: "atom", + pos: position{line: 42, col: 1, offset: 931}, + expr: &oneOrMoreExpr{ + pos: position{line: 42, col: 8, offset: 938}, + expr: &charClassMatcher{ + pos: position{line: 42, col: 8, offset: 938}, + val: "[0-9]", + ranges: []rune{'0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + }, + }, +} + +func (c *current) onstart1(a any) (any, error) { + return a, nil +} + +func (p *parser) callonstart1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onstart1(stack["a"]) +} + +func (c *current) onexpr1(a, b any) (any, error) { + strA := a.(string) + return exprToString(strA, b), nil +} + +func (p *parser) callonexpr1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onexpr1(stack["a"], stack["b"]) +} + +func (c *current) onterm1(a, b any) (any, error) { + strA := a.(string) + return exprToString(strA, b), nil +} + +func (p *parser) callonterm1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onterm1(stack["a"], stack["b"]) +} + +func (c *current) onfactor2(op, a any) (any, error) { + strA := a.(string) + strOp := string(op.([]byte)) + return "(" + strOp + strA + ")", nil +} + +func (p *parser) callonfactor2() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onfactor2(stack["op"], stack["a"]) +} + +func (c *current) onfactor10() (any, error) { + return string(c.text), nil +} + +func (p *parser) callonfactor10() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onfactor10() +} + +var ( + // errNoRule is returned when the grammar to parse has no rule. + errNoRule = errors.New("grammar has no rule") + + // errInvalidEntrypoint is returned when the specified entrypoint rule + // does not exit. + errInvalidEntrypoint = errors.New("invalid entrypoint") + + // errInvalidEncoding is returned when the source is not properly + // utf8-encoded. + errInvalidEncoding = errors.New("invalid encoding") + + // errMaxExprCnt is used to signal that the maximum number of + // expressions have been parsed. + errMaxExprCnt = errors.New("max number of expresssions parsed") +) + +// Option is a function that can set an option on the parser. It returns +// the previous setting as an Option. +type Option func(*parser) Option + +// MaxExpressions creates an Option to stop parsing after the provided +// number of expressions have been parsed, if the value is 0 then the parser will +// parse for as many steps as needed (possibly an infinite number). +// +// The default for maxExprCnt is 0. +func MaxExpressions(maxExprCnt uint64) Option { + return func(p *parser) Option { + oldMaxExprCnt := p.maxExprCnt + p.maxExprCnt = maxExprCnt + return MaxExpressions(oldMaxExprCnt) + } +} + +// Entrypoint creates an Option to set the rule name to use as entrypoint. +// The rule name must have been specified in the -alternate-entrypoints +// if generating the parser with the -optimize-grammar flag, otherwise +// it may have been optimized out. Passing an empty string sets the +// entrypoint to the first rule in the grammar. +// +// The default is to start parsing at the first rule in the grammar. +func Entrypoint(ruleName string) Option { + return func(p *parser) Option { + oldEntrypoint := p.entrypoint + p.entrypoint = ruleName + if ruleName == "" { + p.entrypoint = g.rules[0].name + } + return Entrypoint(oldEntrypoint) + } +} + +// Statistics adds a user provided Stats struct to the parser to allow +// the user to process the results after the parsing has finished. +// Also the key for the "no match" counter is set. +// +// Example usage: +// +// input := "input" +// stats := Stats{} +// _, err := Parse("input-file", []byte(input), Statistics(&stats, "no match")) +// if err != nil { +// log.Panicln(err) +// } +// b, err := json.MarshalIndent(stats.ChoiceAltCnt, "", " ") +// if err != nil { +// log.Panicln(err) +// } +// fmt.Println(string(b)) +func Statistics(stats *Stats, choiceNoMatch string) Option { + return func(p *parser) Option { + oldStats := p.Stats + p.Stats = stats + oldChoiceNoMatch := p.choiceNoMatch + p.choiceNoMatch = choiceNoMatch + if p.Stats.ChoiceAltCnt == nil { + p.Stats.ChoiceAltCnt = make(map[string]map[string]int) + } + return Statistics(oldStats, oldChoiceNoMatch) + } +} + +// Debug creates an Option to set the debug flag to b. When set to true, +// debugging information is printed to stdout while parsing. +// +// The default is false. +func Debug(b bool) Option { + return func(p *parser) Option { + old := p.debug + p.debug = b + return Debug(old) + } +} + +// Memoize creates an Option to set the memoize flag to b. When set to true, +// the parser will cache all results so each expression is evaluated only +// once. This guarantees linear parsing time even for pathological cases, +// at the expense of more memory and slower times for typical cases. +// +// The default is false. +func Memoize(b bool) Option { + return func(p *parser) Option { + old := p.memoize + p.memoize = b + return Memoize(old) + } +} + +// AllowInvalidUTF8 creates an Option to allow invalid UTF-8 bytes. +// Every invalid UTF-8 byte is treated as a utf8.RuneError (U+FFFD) +// by character class matchers and is matched by the any matcher. +// The returned matched value, c.text and c.offset are NOT affected. +// +// The default is false. +func AllowInvalidUTF8(b bool) Option { + return func(p *parser) Option { + old := p.allowInvalidUTF8 + p.allowInvalidUTF8 = b + return AllowInvalidUTF8(old) + } +} + +// Recover creates an Option to set the recover flag to b. When set to +// true, this causes the parser to recover from panics and convert it +// to an error. Setting it to false can be useful while debugging to +// access the full stack trace. +// +// The default is true. +func Recover(b bool) Option { + return func(p *parser) Option { + old := p.recover + p.recover = b + return Recover(old) + } +} + +// GlobalStore creates an Option to set a key to a certain value in +// the globalStore. +func GlobalStore(key string, value any) Option { + return func(p *parser) Option { + old := p.cur.globalStore[key] + p.cur.globalStore[key] = value + return GlobalStore(key, old) + } +} + +// InitState creates an Option to set a key to a certain value in +// the global "state" store. +func InitState(key string, value any) Option { + return func(p *parser) Option { + old := p.cur.state[key] + p.cur.state[key] = value + return InitState(key, old) + } +} + +// ParseFile parses the file identified by filename. +func ParseFile(filename string, opts ...Option) (i any, err error) { // nolint: deadcode + f, err := os.Open(filename) + if err != nil { + return nil, err + } + defer func() { + if closeErr := f.Close(); closeErr != nil { + err = closeErr + } + }() + return ParseReader(filename, f, opts...) +} + +// ParseReader parses the data from r using filename as information in the +// error messages. +func ParseReader(filename string, r io.Reader, opts ...Option) (any, error) { // nolint: deadcode + b, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + return Parse(filename, b, opts...) +} + +// Parse parses the data from b using filename as information in the +// error messages. +func Parse(filename string, b []byte, opts ...Option) (any, error) { + return newParser(filename, b, opts...).parse(g) +} + +// position records a position in the text. +type position struct { + line, col, offset int +} + +func (p position) String() string { + return strconv.Itoa(p.line) + ":" + strconv.Itoa(p.col) + " [" + strconv.Itoa(p.offset) + "]" +} + +// savepoint stores all state required to go back to this point in the +// parser. +type savepoint struct { + position + rn rune + w int +} + +type current struct { + pos position // start position of the match + text []byte // raw text of the match + + // state is a store for arbitrary key,value pairs that the user wants to be + // tied to the backtracking of the parser. + // This is always rolled back if a parsing rule fails. + state storeDict + + // globalStore is a general store for the user to store arbitrary key-value + // pairs that they need to manage and that they do not want tied to the + // backtracking of the parser. This is only modified by the user and never + // rolled back by the parser. It is always up to the user to keep this in a + // consistent state. + globalStore storeDict +} + +type storeDict map[string]any + +// the AST types... + +// nolint: structcheck +type grammar struct { + pos position + rules []*rule +} + +// nolint: structcheck +type rule struct { + pos position + name string + displayName string + expr any +} + +// nolint: structcheck +type choiceExpr struct { + pos position + alternatives []any +} + +// nolint: structcheck +type actionExpr struct { + pos position + expr any + run func(*parser) (any, error) +} + +// nolint: structcheck +type recoveryExpr struct { + pos position + expr any + recoverExpr any + failureLabel []string +} + +// nolint: structcheck +type seqExpr struct { + pos position + exprs []any +} + +// nolint: structcheck +type throwExpr struct { + pos position + label string +} + +// nolint: structcheck +type labeledExpr struct { + pos position + label string + expr any +} + +// nolint: structcheck +type expr struct { + pos position + expr any +} + +type ( + andExpr expr // nolint: structcheck + notExpr expr // nolint: structcheck + zeroOrOneExpr expr // nolint: structcheck + zeroOrMoreExpr expr // nolint: structcheck + oneOrMoreExpr expr // nolint: structcheck +) + +// nolint: structcheck +type ruleRefExpr struct { + pos position + name string +} + +// nolint: structcheck +type stateCodeExpr struct { + pos position + run func(*parser) error +} + +// nolint: structcheck +type andCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +// nolint: structcheck +type notCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +// nolint: structcheck +type litMatcher struct { + pos position + val string + ignoreCase bool + want string +} + +// nolint: structcheck +type charClassMatcher struct { + pos position + val string + basicLatinChars [128]bool + chars []rune + ranges []rune + classes []*unicode.RangeTable + ignoreCase bool + inverted bool +} + +type anyMatcher position // nolint: structcheck + +// errList cumulates the errors found by the parser. +type errList []error + +func (e *errList) add(err error) { + *e = append(*e, err) +} + +func (e errList) err() error { + if len(e) == 0 { + return nil + } + e.dedupe() + return e +} + +func (e *errList) dedupe() { + var cleaned []error + set := make(map[string]bool) + for _, err := range *e { + if msg := err.Error(); !set[msg] { + set[msg] = true + cleaned = append(cleaned, err) + } + } + *e = cleaned +} + +func (e errList) Error() string { + switch len(e) { + case 0: + return "" + case 1: + return e[0].Error() + default: + var buf bytes.Buffer + + for i, err := range e { + if i > 0 { + buf.WriteRune('\n') + } + buf.WriteString(err.Error()) + } + return buf.String() + } +} + +// parserError wraps an error with a prefix indicating the rule in which +// the error occurred. The original error is stored in the Inner field. +type parserError struct { + Inner error + pos position + prefix string + expected []string +} + +// Error returns the error message. +func (p *parserError) Error() string { + return p.prefix + ": " + p.Inner.Error() +} + +// newParser creates a parser with the specified input source and options. +func newParser(filename string, b []byte, opts ...Option) *parser { + stats := Stats{ + ChoiceAltCnt: make(map[string]map[string]int), + } + + p := &parser{ + filename: filename, + errs: new(errList), + data: b, + pt: savepoint{position: position{line: 1}}, + recover: true, + cur: current{ + state: make(storeDict), + globalStore: make(storeDict), + }, + maxFailPos: position{col: 1, line: 1}, + maxFailExpected: make([]string, 0, 20), + Stats: &stats, + // start rule is rule [0] unless an alternate entrypoint is specified + entrypoint: g.rules[0].name, + } + p.setOptions(opts) + + if p.maxExprCnt == 0 { + p.maxExprCnt = math.MaxUint64 + } + + return p +} + +// setOptions applies the options to the parser. +func (p *parser) setOptions(opts []Option) { + for _, opt := range opts { + opt(p) + } +} + +// nolint: structcheck,deadcode +type resultTuple struct { + v any + b bool + end savepoint +} + +// nolint: varcheck +const choiceNoMatch = -1 + +// Stats stores some statistics, gathered during parsing +type Stats struct { + // ExprCnt counts the number of expressions processed during parsing + // This value is compared to the maximum number of expressions allowed + // (set by the MaxExpressions option). + ExprCnt uint64 + + // ChoiceAltCnt is used to count for each ordered choice expression, + // which alternative is used how may times. + // These numbers allow to optimize the order of the ordered choice expression + // to increase the performance of the parser + // + // The outer key of ChoiceAltCnt is composed of the name of the rule as well + // as the line and the column of the ordered choice. + // The inner key of ChoiceAltCnt is the number (one-based) of the matching alternative. + // For each alternative the number of matches are counted. If an ordered choice does not + // match, a special counter is incremented. The name of this counter is set with + // the parser option Statistics. + // For an alternative to be included in ChoiceAltCnt, it has to match at least once. + ChoiceAltCnt map[string]map[string]int +} + +// nolint: structcheck,maligned +type parser struct { + filename string + pt savepoint + cur current + + data []byte + errs *errList + + depth int + recover bool + debug bool + + memoize bool + // memoization table for the packrat algorithm: + // map[offset in source] map[expression or rule] {value, match} + memo map[int]map[any]resultTuple + + // rules table, maps the rule identifier to the rule node + rules map[string]*rule + // variables stack, map of label to value + vstack []map[string]any + // rule stack, allows identification of the current rule in errors + rstack []*rule + + // parse fail + maxFailPos position + maxFailExpected []string + maxFailInvertExpected bool + + // max number of expressions to be parsed + maxExprCnt uint64 + // entrypoint for the parser + entrypoint string + + allowInvalidUTF8 bool + + *Stats + + choiceNoMatch string + // recovery expression stack, keeps track of the currently available recovery expression, these are traversed in reverse + recoveryStack []map[string]any +} + +// push a variable set on the vstack. +func (p *parser) pushV() { + if cap(p.vstack) == len(p.vstack) { + // create new empty slot in the stack + p.vstack = append(p.vstack, nil) + } else { + // slice to 1 more + p.vstack = p.vstack[:len(p.vstack)+1] + } + + // get the last args set + m := p.vstack[len(p.vstack)-1] + if m != nil && len(m) == 0 { + // empty map, all good + return + } + + m = make(map[string]any) + p.vstack[len(p.vstack)-1] = m +} + +// pop a variable set from the vstack. +func (p *parser) popV() { + // if the map is not empty, clear it + m := p.vstack[len(p.vstack)-1] + if len(m) > 0 { + // GC that map + p.vstack[len(p.vstack)-1] = nil + } + p.vstack = p.vstack[:len(p.vstack)-1] +} + +// push a recovery expression with its labels to the recoveryStack +func (p *parser) pushRecovery(labels []string, expr any) { + if cap(p.recoveryStack) == len(p.recoveryStack) { + // create new empty slot in the stack + p.recoveryStack = append(p.recoveryStack, nil) + } else { + // slice to 1 more + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)+1] + } + + m := make(map[string]any, len(labels)) + for _, fl := range labels { + m[fl] = expr + } + p.recoveryStack[len(p.recoveryStack)-1] = m +} + +// pop a recovery expression from the recoveryStack +func (p *parser) popRecovery() { + // GC that map + p.recoveryStack[len(p.recoveryStack)-1] = nil + + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)-1] +} + +func (p *parser) print(prefix, s string) string { + if !p.debug { + return s + } + + fmt.Printf("%s %d:%d:%d: %s [%#U]\n", + prefix, p.pt.line, p.pt.col, p.pt.offset, s, p.pt.rn) + return s +} + +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + +func (p *parser) in(s string) string { + res := p.printIndent(">", s) + p.depth++ + return res +} + +func (p *parser) out(s string) string { + p.depth-- + return p.printIndent("<", s) +} + +func (p *parser) addErr(err error) { + p.addErrAt(err, p.pt.position, []string{}) +} + +func (p *parser) addErrAt(err error, pos position, expected []string) { + var buf bytes.Buffer + if p.filename != "" { + buf.WriteString(p.filename) + } + if buf.Len() > 0 { + buf.WriteString(":") + } + buf.WriteString(fmt.Sprintf("%d:%d (%d)", pos.line, pos.col, pos.offset)) + if len(p.rstack) > 0 { + if buf.Len() > 0 { + buf.WriteString(": ") + } + rule := p.rstack[len(p.rstack)-1] + if rule.displayName != "" { + buf.WriteString("rule " + rule.displayName) + } else { + buf.WriteString("rule " + rule.name) + } + } + pe := &parserError{Inner: err, pos: pos, prefix: buf.String(), expected: expected} + p.errs.add(pe) +} + +func (p *parser) failAt(fail bool, pos position, want string) { + // process fail if parsing fails and not inverted or parsing succeeds and invert is set + if fail == p.maxFailInvertExpected { + if pos.offset < p.maxFailPos.offset { + return + } + + if pos.offset > p.maxFailPos.offset { + p.maxFailPos = pos + p.maxFailExpected = p.maxFailExpected[:0] + } + + if p.maxFailInvertExpected { + want = "!" + want + } + p.maxFailExpected = append(p.maxFailExpected, want) + } +} + +// read advances the parser to the next rune. +func (p *parser) read() { + p.pt.offset += p.pt.w + rn, n := utf8.DecodeRune(p.data[p.pt.offset:]) + p.pt.rn = rn + p.pt.w = n + p.pt.col++ + if rn == '\n' { + p.pt.line++ + p.pt.col = 0 + } + + if rn == utf8.RuneError && n == 1 { // see utf8.DecodeRune + if !p.allowInvalidUTF8 { + p.addErr(errInvalidEncoding) + } + } +} + +// restore parser position to the savepoint pt. +func (p *parser) restore(pt savepoint) { + if p.debug { + defer p.out(p.in("restore")) + } + if pt.offset == p.pt.offset { + return + } + p.pt = pt +} + +// Cloner is implemented by any value that has a Clone method, which returns a +// copy of the value. This is mainly used for types which are not passed by +// value (e.g map, slice, chan) or structs that contain such types. +// +// This is used in conjunction with the global state feature to create proper +// copies of the state to allow the parser to properly restore the state in +// the case of backtracking. +type Cloner interface { + Clone() any +} + +var statePool = &sync.Pool{ + New: func() any { return make(storeDict) }, +} + +func (sd storeDict) Discard() { + for k := range sd { + delete(sd, k) + } + statePool.Put(sd) +} + +// clone and return parser current state. +func (p *parser) cloneState() storeDict { + if p.debug { + defer p.out(p.in("cloneState")) + } + + state := statePool.Get().(storeDict) + for k, v := range p.cur.state { + if c, ok := v.(Cloner); ok { + state[k] = c.Clone() + } else { + state[k] = v + } + } + return state +} + +// restore parser current state to the state storeDict. +// every restoreState should applied only one time for every cloned state +func (p *parser) restoreState(state storeDict) { + if p.debug { + defer p.out(p.in("restoreState")) + } + p.cur.state.Discard() + p.cur.state = state +} + +// get the slice of bytes from the savepoint start to the current position. +func (p *parser) sliceFrom(start savepoint) []byte { + return p.data[start.position.offset:p.pt.position.offset] +} + +func (p *parser) getMemoized(node any) (resultTuple, bool) { + if len(p.memo) == 0 { + return resultTuple{}, false + } + m := p.memo[p.pt.offset] + if len(m) == 0 { + return resultTuple{}, false + } + res, ok := m[node] + return res, ok +} + +func (p *parser) setMemoized(pt savepoint, node any, tuple resultTuple) { + if p.memo == nil { + p.memo = make(map[int]map[any]resultTuple) + } + m := p.memo[pt.offset] + if m == nil { + m = make(map[any]resultTuple) + p.memo[pt.offset] = m + } + m[node] = tuple +} + +func (p *parser) buildRulesTable(g *grammar) { + p.rules = make(map[string]*rule, len(g.rules)) + for _, r := range g.rules { + p.rules[r.name] = r + } +} + +// nolint: gocyclo +func (p *parser) parse(g *grammar) (val any, err error) { + if len(g.rules) == 0 { + p.addErr(errNoRule) + return nil, p.errs.err() + } + + // TODO : not super critical but this could be generated + p.buildRulesTable(g) + + if p.recover { + // panic can be used in action code to stop parsing immediately + // and return the panic as an error. + defer func() { + if e := recover(); e != nil { + if p.debug { + defer p.out(p.in("panic handler")) + } + val = nil + switch e := e.(type) { + case error: + p.addErr(e) + default: + p.addErr(fmt.Errorf("%v", e)) + } + err = p.errs.err() + } + }() + } + + startRule, ok := p.rules[p.entrypoint] + if !ok { + p.addErr(errInvalidEntrypoint) + return nil, p.errs.err() + } + + p.read() // advance to first rune + val, ok = p.parseRuleWrap(startRule) + if !ok { + if len(*p.errs) == 0 { + // If parsing fails, but no errors have been recorded, the expected values + // for the farthest parser position are returned as error. + maxFailExpectedMap := make(map[string]struct{}, len(p.maxFailExpected)) + for _, v := range p.maxFailExpected { + maxFailExpectedMap[v] = struct{}{} + } + expected := make([]string, 0, len(maxFailExpectedMap)) + eof := false + if _, ok := maxFailExpectedMap["!."]; ok { + delete(maxFailExpectedMap, "!.") + eof = true + } + for k := range maxFailExpectedMap { + expected = append(expected, k) + } + sort.Strings(expected) + if eof { + expected = append(expected, "EOF") + } + p.addErrAt(errors.New("no match found, expected: "+listJoin(expected, ", ", "or")), p.maxFailPos, expected) + } + + return nil, p.errs.err() + } + return val, p.errs.err() +} + +func listJoin(list []string, sep string, lastSep string) string { + switch len(list) { + case 0: + return "" + case 1: + return list[0] + default: + return strings.Join(list[:len(list)-1], sep) + " " + lastSep + " " + list[len(list)-1] + } +} + +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { + if p.debug { + defer p.out(p.in("parseRule " + rule.name)) + } + var ( + val any + ok bool + startMark = p.pt + ) + + if p.memoize { + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) + } + + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { + p.rstack = append(p.rstack, rule) + p.pushV() + val, ok := p.parseExprWrap(rule.expr) + p.popV() + p.rstack = p.rstack[:len(p.rstack)-1] + return val, ok +} + +func (p *parser) parseExprWrap(expr any) (any, bool) { + var pt savepoint + + if p.memoize { + res, ok := p.getMemoized(expr) + if ok { + p.restore(res.end) + return res.v, res.b + } + pt = p.pt + } + + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { + p.ExprCnt++ + if p.ExprCnt > p.maxExprCnt { + panic(errMaxExprCnt) + } + + var val any + var ok bool + switch expr := expr.(type) { + case *actionExpr: + val, ok = p.parseActionExpr(expr) + case *andCodeExpr: + val, ok = p.parseAndCodeExpr(expr) + case *andExpr: + val, ok = p.parseAndExpr(expr) + case *anyMatcher: + val, ok = p.parseAnyMatcher(expr) + case *charClassMatcher: + val, ok = p.parseCharClassMatcher(expr) + case *choiceExpr: + val, ok = p.parseChoiceExpr(expr) + case *labeledExpr: + val, ok = p.parseLabeledExpr(expr) + case *litMatcher: + val, ok = p.parseLitMatcher(expr) + case *notCodeExpr: + val, ok = p.parseNotCodeExpr(expr) + case *notExpr: + val, ok = p.parseNotExpr(expr) + case *oneOrMoreExpr: + val, ok = p.parseOneOrMoreExpr(expr) + case *recoveryExpr: + val, ok = p.parseRecoveryExpr(expr) + case *ruleRefExpr: + val, ok = p.parseRuleRefExpr(expr) + case *seqExpr: + val, ok = p.parseSeqExpr(expr) + case *stateCodeExpr: + val, ok = p.parseStateCodeExpr(expr) + case *throwExpr: + val, ok = p.parseThrowExpr(expr) + case *zeroOrMoreExpr: + val, ok = p.parseZeroOrMoreExpr(expr) + case *zeroOrOneExpr: + val, ok = p.parseZeroOrOneExpr(expr) + default: + panic(fmt.Sprintf("unknown expression type %T", expr)) + } + return val, ok +} + +func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseActionExpr")) + } + + start := p.pt + val, ok := p.parseExprWrap(act.expr) + if ok { + p.cur.pos = start.position + p.cur.text = p.sliceFrom(start) + state := p.cloneState() + actVal, err := act.run(p) + if err != nil { + p.addErrAt(err, start.position, []string{}) + } + p.restoreState(state) + + val = actVal + } + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(start))) + } + return val, ok +} + +func (p *parser) parseAndCodeExpr(and *andCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseAndCodeExpr")) + } + + state := p.cloneState() + + ok, err := and.run(p) + if err != nil { + p.addErr(err) + } + p.restoreState(state) + + return nil, ok +} + +func (p *parser) parseAndExpr(and *andExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseAndExpr")) + } + + pt := p.pt + state := p.cloneState() + p.pushV() + _, ok := p.parseExprWrap(and.expr) + p.popV() + p.restoreState(state) + p.restore(pt) + + return nil, ok +} + +func (p *parser) parseAnyMatcher(any *anyMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseAnyMatcher")) + } + + if p.pt.rn == utf8.RuneError && p.pt.w == 0 { + // EOF - see utf8.DecodeRune + p.failAt(false, p.pt.position, ".") + return nil, false + } + start := p.pt + p.read() + p.failAt(true, start.position, ".") + return p.sliceFrom(start), true +} + +// nolint: gocyclo +func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseCharClassMatcher")) + } + + cur := p.pt.rn + start := p.pt + + // can't match EOF + if cur == utf8.RuneError && p.pt.w == 0 { // see utf8.DecodeRune + p.failAt(false, start.position, chr.val) + return nil, false + } + + if chr.ignoreCase { + cur = unicode.ToLower(cur) + } + + // try to match in the list of available chars + for _, rn := range chr.chars { + if rn == cur { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of ranges + for i := 0; i < len(chr.ranges); i += 2 { + if cur >= chr.ranges[i] && cur <= chr.ranges[i+1] { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of Unicode classes + for _, cl := range chr.classes { + if unicode.Is(cl, cur) { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + if chr.inverted { + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + p.failAt(false, start.position, chr.val) + return nil, false +} + +func (p *parser) incChoiceAltCnt(ch *choiceExpr, altI int) { + choiceIdent := fmt.Sprintf("%s %d:%d", p.rstack[len(p.rstack)-1].name, ch.pos.line, ch.pos.col) + m := p.ChoiceAltCnt[choiceIdent] + if m == nil { + m = make(map[string]int) + p.ChoiceAltCnt[choiceIdent] = m + } + // We increment altI by 1, so the keys do not start at 0 + alt := strconv.Itoa(altI + 1) + if altI == choiceNoMatch { + alt = p.choiceNoMatch + } + m[alt]++ +} + +func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseChoiceExpr")) + } + + for altI, alt := range ch.alternatives { + // dummy assignment to prevent compile error if optimized + _ = altI + + state := p.cloneState() + + p.pushV() + val, ok := p.parseExprWrap(alt) + p.popV() + if ok { + p.incChoiceAltCnt(ch, altI) + return val, ok + } + p.restoreState(state) + } + p.incChoiceAltCnt(ch, choiceNoMatch) + return nil, false +} + +func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseLabeledExpr")) + } + + p.pushV() + val, ok := p.parseExprWrap(lab.expr) + p.popV() + if ok && lab.label != "" { + m := p.vstack[len(p.vstack)-1] + m[lab.label] = val + } + return val, ok +} + +func (p *parser) parseLitMatcher(lit *litMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseLitMatcher")) + } + + start := p.pt + for _, want := range lit.val { + cur := p.pt.rn + if lit.ignoreCase { + cur = unicode.ToLower(cur) + } + if cur != want { + p.failAt(false, start.position, lit.want) + p.restore(start) + return nil, false + } + p.read() + } + p.failAt(true, start.position, lit.want) + return p.sliceFrom(start), true +} + +func (p *parser) parseNotCodeExpr(not *notCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseNotCodeExpr")) + } + + state := p.cloneState() + + ok, err := not.run(p) + if err != nil { + p.addErr(err) + } + p.restoreState(state) + + return nil, !ok +} + +func (p *parser) parseNotExpr(not *notExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseNotExpr")) + } + + pt := p.pt + state := p.cloneState() + p.pushV() + p.maxFailInvertExpected = !p.maxFailInvertExpected + _, ok := p.parseExprWrap(not.expr) + p.maxFailInvertExpected = !p.maxFailInvertExpected + p.popV() + p.restoreState(state) + p.restore(pt) + + return nil, !ok +} + +func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseOneOrMoreExpr")) + } + + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + if len(vals) == 0 { + // did not match once, no match + return nil, false + } + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseRecoveryExpr (" + strings.Join(recover.failureLabel, ",") + ")")) + } + + p.pushRecovery(recover.failureLabel, recover.recoverExpr) + val, ok := p.parseExprWrap(recover.expr) + p.popRecovery() + + return val, ok +} + +func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseRuleRefExpr " + ref.name)) + } + + if ref.name == "" { + panic(fmt.Sprintf("%s: invalid rule: missing name", ref.pos)) + } + + rule := p.rules[ref.name] + if rule == nil { + p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) + return nil, false + } + return p.parseRuleWrap(rule) +} + +func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseSeqExpr")) + } + + vals := make([]any, 0, len(seq.exprs)) + + pt := p.pt + state := p.cloneState() + for _, expr := range seq.exprs { + val, ok := p.parseExprWrap(expr) + if !ok { + p.restoreState(state) + p.restore(pt) + return nil, false + } + vals = append(vals, val) + } + return vals, true +} + +func (p *parser) parseStateCodeExpr(state *stateCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseStateCodeExpr")) + } + + err := state.run(p) + if err != nil { + p.addErr(err) + } + return nil, true +} + +func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseThrowExpr")) + } + + for i := len(p.recoveryStack) - 1; i >= 0; i-- { + if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { + return val, ok + } + } + } + + return nil, false +} + +func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseZeroOrMoreExpr")) + } + + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseZeroOrOneExpr")) + } + + p.pushV() + val, _ := p.parseExprWrap(expr.expr) + p.popV() + // whether it matched or not, consider it a match + return val, true +} diff --git a/test/left_recursion/without_left_recursion.peg b/test/left_recursion/without_left_recursion.peg new file mode 100644 index 0000000..8758cda --- /dev/null +++ b/test/left_recursion/without_left_recursion.peg @@ -0,0 +1,42 @@ +{ + package withoutleftrecursion + + func toAnySlice(v any) []any { + if v == nil { + return nil + } + return v.([]any) + } + + func exprToString(first string, rest any) string { + restSl := toAnySlice(rest) + l := first + for _, v := range restSl { + restExpr := toAnySlice(v) + r := restExpr[1].(string) + op := string(restExpr[0].([]byte)) + l = "(" + l + op + r + ")" + } + return l + } +} + +start = a:expr !. { + return a, nil +} +expr = a:term b:(( '+' / '-' ) term )* { + strA := a.(string) + return exprToString(strA, b), nil +} +term = a:factor b:( ( '*' / '/' / '%') factor )* { + strA := a.(string) + return exprToString(strA, b), nil +} +factor = op:('+' / '-') a:factor { + strA := a.(string) + strOp := string(op.([]byte)) + return "(" + strOp + strA + ")", nil +} / atom { + return string(c.text), nil +} +atom = [0-9]+ diff --git a/test/left_recursion_labeled_failures/errors.go b/test/left_recursion_labeled_failures/errors.go new file mode 100644 index 0000000..f3dca03 --- /dev/null +++ b/test/left_recursion_labeled_failures/errors.go @@ -0,0 +1,31 @@ +package leftrecursionlabeledfailures + +// ErrorLister is the public interface to access the inner errors +// included in a errList. +type ErrorLister interface { + Errors() []error +} + +func (e errList) Errors() []error { + return e +} + +// ParserError is the public interface to errors of type parserError. +type ParserError interface { + Error() string + InnerError() error + Pos() (int, int, int) + Expected() []string +} + +func (p *parserError) InnerError() error { + return p.Inner +} + +func (p *parserError) Pos() (line, col, offset int) { + return p.pos.line, p.pos.col, p.pos.offset +} + +func (p *parserError) Expected() []string { + return p.expected +} diff --git a/test/left_recursion_labeled_failures/left_recursion_labeled_failures.go b/test/left_recursion_labeled_failures/left_recursion_labeled_failures.go new file mode 100644 index 0000000..80e6290 --- /dev/null +++ b/test/left_recursion_labeled_failures/left_recursion_labeled_failures.go @@ -0,0 +1,1755 @@ +// Code generated by pigeon; DO NOT EDIT. + +package leftrecursionlabeledfailures + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "os" + "sort" + "strconv" + "strings" + "sync" + "unicode" + "unicode/utf8" +) + +func ids(list, id any) (any, error) { + l := toStringSlice(list) + l = append(l, id.(string)) + return l, nil +} + +func toStringSlice(v any) []string { + if v == nil { + return nil + } + return v.([]string) +} + +var g = &grammar{ + rules: []*rule{ + { + name: "S", + pos: position{line: 18, col: 1, offset: 244}, + expr: &recoveryExpr{ + pos: position{line: 18, col: 5, offset: 250}, + expr: &recoveryExpr{ + pos: position{line: 18, col: 5, offset: 250}, + expr: &actionExpr{ + pos: position{line: 18, col: 5, offset: 250}, + run: (*parser).callonS3, + expr: &labeledExpr{ + pos: position{line: 18, col: 5, offset: 250}, + label: "list", + expr: &ruleRefExpr{ + pos: position{line: 18, col: 10, offset: 255}, + name: "List", + }, + }, + }, + recoverExpr: &ruleRefExpr{ + pos: position{line: 20, col: 16, offset: 309}, + name: "ErrComma", + }, + failureLabel: []string{ + "errComma", + }, + }, + recoverExpr: &ruleRefExpr{ + pos: position{line: 20, col: 35, offset: 328}, + name: "ErrID", + }, + failureLabel: []string{ + "errId", + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "List", + pos: position{line: 22, col: 1, offset: 335}, + expr: &choiceExpr{ + pos: position{line: 22, col: 8, offset: 344}, + alternatives: []any{ + &actionExpr{ + pos: position{line: 22, col: 8, offset: 344}, + run: (*parser).callonList2, + expr: &seqExpr{ + pos: position{line: 22, col: 9, offset: 345}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 22, col: 9, offset: 345}, + label: "list", + expr: &ruleRefExpr{ + pos: position{line: 22, col: 14, offset: 350}, + name: "List", + }, + }, + &ruleRefExpr{ + pos: position{line: 22, col: 19, offset: 355}, + name: "Comma", + }, + &labeledExpr{ + pos: position{line: 22, col: 25, offset: 361}, + label: "id", + expr: &ruleRefExpr{ + pos: position{line: 22, col: 28, offset: 364}, + name: "ID", + }, + }, + }, + }, + }, + &actionExpr{ + pos: position{line: 24, col: 5, offset: 399}, + run: (*parser).callonList9, + expr: &labeledExpr{ + pos: position{line: 24, col: 5, offset: 399}, + label: "id", + expr: &ruleRefExpr{ + pos: position{line: 24, col: 8, offset: 402}, + name: "ID", + }, + }, + }, + }, + }, + leader: true, + leftRecursive: true, + }, + { + name: "ID", + pos: position{line: 28, col: 1, offset: 448}, + expr: &choiceExpr{ + pos: position{line: 28, col: 6, offset: 455}, + alternatives: []any{ + &actionExpr{ + pos: position{line: 28, col: 6, offset: 455}, + run: (*parser).callonID2, + expr: &seqExpr{ + pos: position{line: 28, col: 6, offset: 455}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 28, col: 6, offset: 455}, + name: "Sp", + }, + &oneOrMoreExpr{ + pos: position{line: 28, col: 9, offset: 458}, + expr: &charClassMatcher{ + pos: position{line: 28, col: 9, offset: 458}, + val: "[a-z]", + ranges: []rune{'a', 'z'}, + ignoreCase: false, + inverted: false, + }, + }, + }, + }, + }, + &throwExpr{ + pos: position{line: 30, col: 5, offset: 531}, + label: "errId", + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "Comma", + pos: position{line: 32, col: 1, offset: 541}, + expr: &choiceExpr{ + pos: position{line: 32, col: 9, offset: 551}, + alternatives: []any{ + &seqExpr{ + pos: position{line: 32, col: 9, offset: 551}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 32, col: 9, offset: 551}, + name: "Sp", + }, + &litMatcher{ + pos: position{line: 32, col: 12, offset: 554}, + val: ",", + ignoreCase: false, + want: "\",\"", + }, + }, + }, + &throwExpr{ + pos: position{line: 32, col: 18, offset: 560}, + label: "errComma", + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "Sp", + pos: position{line: 33, col: 1, offset: 572}, + expr: &zeroOrMoreExpr{ + pos: position{line: 33, col: 6, offset: 579}, + expr: &charClassMatcher{ + pos: position{line: 33, col: 6, offset: 579}, + val: "[ \\t\\r\\n]", + chars: []rune{' ', '\t', '\r', '\n'}, + ignoreCase: false, + inverted: false, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "ErrComma", + pos: position{line: 35, col: 1, offset: 591}, + expr: &seqExpr{ + pos: position{line: 35, col: 12, offset: 604}, + exprs: []any{ + &stateCodeExpr{ + pos: position{line: 35, col: 12, offset: 604}, + run: (*parser).callonErrComma2, + }, + &zeroOrMoreExpr{ + pos: position{line: 37, col: 7, offset: 656}, + expr: &seqExpr{ + pos: position{line: 37, col: 9, offset: 658}, + exprs: []any{ + ¬Expr{ + pos: position{line: 37, col: 9, offset: 658}, + expr: &oneOrMoreExpr{ + pos: position{line: 37, col: 11, offset: 660}, + expr: &charClassMatcher{ + pos: position{line: 37, col: 11, offset: 660}, + val: "[a-z]", + ranges: []rune{'a', 'z'}, + ignoreCase: false, + inverted: false, + }, + }, + }, + &anyMatcher{ + line: 37, col: 19, offset: 668, + }, + }, + }, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "ErrID", + pos: position{line: 38, col: 1, offset: 672}, + expr: &actionExpr{ + pos: position{line: 38, col: 9, offset: 682}, + run: (*parser).callonErrID1, + expr: &seqExpr{ + pos: position{line: 38, col: 9, offset: 682}, + exprs: []any{ + &stateCodeExpr{ + pos: position{line: 38, col: 9, offset: 682}, + run: (*parser).callonErrID3, + }, + &zeroOrMoreExpr{ + pos: position{line: 40, col: 7, offset: 744}, + expr: &seqExpr{ + pos: position{line: 40, col: 9, offset: 746}, + exprs: []any{ + ¬Expr{ + pos: position{line: 40, col: 9, offset: 746}, + expr: &litMatcher{ + pos: position{line: 40, col: 11, offset: 748}, + val: ",", + ignoreCase: false, + want: "\",\"", + }, + }, + &anyMatcher{ + line: 40, col: 16, offset: 753, + }, + }, + }, + }, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + }, +} + +func (c *current) onS3(list any) (any, error) { + return list.([]string), nil +} + +func (p *parser) callonS3() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onS3(stack["list"]) +} + +func (c *current) onList2(list, id any) (any, error) { + return ids(list, id) +} + +func (p *parser) callonList2() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onList2(stack["list"], stack["id"]) +} + +func (c *current) onList9(id any) (any, error) { + return []string{id.(string)}, nil +} + +func (p *parser) callonList9() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onList9(stack["id"]) +} + +func (c *current) onID2() (any, error) { + return strings.TrimLeft(string(c.text), " \t\r\n"), nil +} + +func (p *parser) callonID2() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onID2() +} + +func (c *current) onErrComma2() error { + return errors.New("expecting ','") + +} + +func (p *parser) callonErrComma2() error { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onErrComma2() +} + +func (c *current) onErrID3() error { + return errors.New("expecting an identifier") + +} + +func (p *parser) callonErrID3() error { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onErrID3() +} + +func (c *current) onErrID1() (any, error) { + return "NONE", nil +} + +func (p *parser) callonErrID1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onErrID1() +} + +var ( + // errNoRule is returned when the grammar to parse has no rule. + errNoRule = errors.New("grammar has no rule") + + // errInvalidEntrypoint is returned when the specified entrypoint rule + // does not exit. + errInvalidEntrypoint = errors.New("invalid entrypoint") + + // errInvalidEncoding is returned when the source is not properly + // utf8-encoded. + errInvalidEncoding = errors.New("invalid encoding") + + // errMaxExprCnt is used to signal that the maximum number of + // expressions have been parsed. + errMaxExprCnt = errors.New("max number of expresssions parsed") +) + +// Option is a function that can set an option on the parser. It returns +// the previous setting as an Option. +type Option func(*parser) Option + +// MaxExpressions creates an Option to stop parsing after the provided +// number of expressions have been parsed, if the value is 0 then the parser will +// parse for as many steps as needed (possibly an infinite number). +// +// The default for maxExprCnt is 0. +func MaxExpressions(maxExprCnt uint64) Option { + return func(p *parser) Option { + oldMaxExprCnt := p.maxExprCnt + p.maxExprCnt = maxExprCnt + return MaxExpressions(oldMaxExprCnt) + } +} + +// Entrypoint creates an Option to set the rule name to use as entrypoint. +// The rule name must have been specified in the -alternate-entrypoints +// if generating the parser with the -optimize-grammar flag, otherwise +// it may have been optimized out. Passing an empty string sets the +// entrypoint to the first rule in the grammar. +// +// The default is to start parsing at the first rule in the grammar. +func Entrypoint(ruleName string) Option { + return func(p *parser) Option { + oldEntrypoint := p.entrypoint + p.entrypoint = ruleName + if ruleName == "" { + p.entrypoint = g.rules[0].name + } + return Entrypoint(oldEntrypoint) + } +} + +// Statistics adds a user provided Stats struct to the parser to allow +// the user to process the results after the parsing has finished. +// Also the key for the "no match" counter is set. +// +// Example usage: +// +// input := "input" +// stats := Stats{} +// _, err := Parse("input-file", []byte(input), Statistics(&stats, "no match")) +// if err != nil { +// log.Panicln(err) +// } +// b, err := json.MarshalIndent(stats.ChoiceAltCnt, "", " ") +// if err != nil { +// log.Panicln(err) +// } +// fmt.Println(string(b)) +func Statistics(stats *Stats, choiceNoMatch string) Option { + return func(p *parser) Option { + oldStats := p.Stats + p.Stats = stats + oldChoiceNoMatch := p.choiceNoMatch + p.choiceNoMatch = choiceNoMatch + if p.Stats.ChoiceAltCnt == nil { + p.Stats.ChoiceAltCnt = make(map[string]map[string]int) + } + return Statistics(oldStats, oldChoiceNoMatch) + } +} + +// Debug creates an Option to set the debug flag to b. When set to true, +// debugging information is printed to stdout while parsing. +// +// The default is false. +func Debug(b bool) Option { + return func(p *parser) Option { + old := p.debug + p.debug = b + return Debug(old) + } +} + +// Memoize creates an Option to set the memoize flag to b. When set to true, +// the parser will cache all results so each expression is evaluated only +// once. This guarantees linear parsing time even for pathological cases, +// at the expense of more memory and slower times for typical cases. +// +// The default is false. +func Memoize(b bool) Option { + return func(p *parser) Option { + old := p.memoize + p.memoize = b + return Memoize(old) + } +} + +// AllowInvalidUTF8 creates an Option to allow invalid UTF-8 bytes. +// Every invalid UTF-8 byte is treated as a utf8.RuneError (U+FFFD) +// by character class matchers and is matched by the any matcher. +// The returned matched value, c.text and c.offset are NOT affected. +// +// The default is false. +func AllowInvalidUTF8(b bool) Option { + return func(p *parser) Option { + old := p.allowInvalidUTF8 + p.allowInvalidUTF8 = b + return AllowInvalidUTF8(old) + } +} + +// Recover creates an Option to set the recover flag to b. When set to +// true, this causes the parser to recover from panics and convert it +// to an error. Setting it to false can be useful while debugging to +// access the full stack trace. +// +// The default is true. +func Recover(b bool) Option { + return func(p *parser) Option { + old := p.recover + p.recover = b + return Recover(old) + } +} + +// GlobalStore creates an Option to set a key to a certain value in +// the globalStore. +func GlobalStore(key string, value any) Option { + return func(p *parser) Option { + old := p.cur.globalStore[key] + p.cur.globalStore[key] = value + return GlobalStore(key, old) + } +} + +// InitState creates an Option to set a key to a certain value in +// the global "state" store. +func InitState(key string, value any) Option { + return func(p *parser) Option { + old := p.cur.state[key] + p.cur.state[key] = value + return InitState(key, old) + } +} + +// ParseFile parses the file identified by filename. +func ParseFile(filename string, opts ...Option) (i any, err error) { // nolint: deadcode + f, err := os.Open(filename) + if err != nil { + return nil, err + } + defer func() { + if closeErr := f.Close(); closeErr != nil { + err = closeErr + } + }() + return ParseReader(filename, f, opts...) +} + +// ParseReader parses the data from r using filename as information in the +// error messages. +func ParseReader(filename string, r io.Reader, opts ...Option) (any, error) { // nolint: deadcode + b, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + return Parse(filename, b, opts...) +} + +// Parse parses the data from b using filename as information in the +// error messages. +func Parse(filename string, b []byte, opts ...Option) (any, error) { + return newParser(filename, b, opts...).parse(g) +} + +// position records a position in the text. +type position struct { + line, col, offset int +} + +func (p position) String() string { + return strconv.Itoa(p.line) + ":" + strconv.Itoa(p.col) + " [" + strconv.Itoa(p.offset) + "]" +} + +// savepoint stores all state required to go back to this point in the +// parser. +type savepoint struct { + position + rn rune + w int +} + +type current struct { + pos position // start position of the match + text []byte // raw text of the match + + // state is a store for arbitrary key,value pairs that the user wants to be + // tied to the backtracking of the parser. + // This is always rolled back if a parsing rule fails. + state storeDict + + // globalStore is a general store for the user to store arbitrary key-value + // pairs that they need to manage and that they do not want tied to the + // backtracking of the parser. This is only modified by the user and never + // rolled back by the parser. It is always up to the user to keep this in a + // consistent state. + globalStore storeDict +} + +type storeDict map[string]any + +// the AST types... + +// nolint: structcheck +type grammar struct { + pos position + rules []*rule +} + +// nolint: structcheck +type rule struct { + pos position + name string + displayName string + expr any + + leader bool + leftRecursive bool +} + +// nolint: structcheck +type choiceExpr struct { + pos position + alternatives []any +} + +// nolint: structcheck +type actionExpr struct { + pos position + expr any + run func(*parser) (any, error) +} + +// nolint: structcheck +type recoveryExpr struct { + pos position + expr any + recoverExpr any + failureLabel []string +} + +// nolint: structcheck +type seqExpr struct { + pos position + exprs []any +} + +// nolint: structcheck +type throwExpr struct { + pos position + label string +} + +// nolint: structcheck +type labeledExpr struct { + pos position + label string + expr any +} + +// nolint: structcheck +type expr struct { + pos position + expr any +} + +type ( + andExpr expr // nolint: structcheck + notExpr expr // nolint: structcheck + zeroOrOneExpr expr // nolint: structcheck + zeroOrMoreExpr expr // nolint: structcheck + oneOrMoreExpr expr // nolint: structcheck +) + +// nolint: structcheck +type ruleRefExpr struct { + pos position + name string +} + +// nolint: structcheck +type stateCodeExpr struct { + pos position + run func(*parser) error +} + +// nolint: structcheck +type andCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +// nolint: structcheck +type notCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +// nolint: structcheck +type litMatcher struct { + pos position + val string + ignoreCase bool + want string +} + +// nolint: structcheck +type charClassMatcher struct { + pos position + val string + basicLatinChars [128]bool + chars []rune + ranges []rune + classes []*unicode.RangeTable + ignoreCase bool + inverted bool +} + +type anyMatcher position // nolint: structcheck + +// errList cumulates the errors found by the parser. +type errList []error + +func (e *errList) add(err error) { + *e = append(*e, err) +} + +func (e errList) err() error { + if len(e) == 0 { + return nil + } + e.dedupe() + return e +} + +func (e *errList) dedupe() { + var cleaned []error + set := make(map[string]bool) + for _, err := range *e { + if msg := err.Error(); !set[msg] { + set[msg] = true + cleaned = append(cleaned, err) + } + } + *e = cleaned +} + +func (e errList) Error() string { + switch len(e) { + case 0: + return "" + case 1: + return e[0].Error() + default: + var buf bytes.Buffer + + for i, err := range e { + if i > 0 { + buf.WriteRune('\n') + } + buf.WriteString(err.Error()) + } + return buf.String() + } +} + +// parserError wraps an error with a prefix indicating the rule in which +// the error occurred. The original error is stored in the Inner field. +type parserError struct { + Inner error + pos position + prefix string + expected []string +} + +// Error returns the error message. +func (p *parserError) Error() string { + return p.prefix + ": " + p.Inner.Error() +} + +// newParser creates a parser with the specified input source and options. +func newParser(filename string, b []byte, opts ...Option) *parser { + stats := Stats{ + ChoiceAltCnt: make(map[string]map[string]int), + } + + p := &parser{ + filename: filename, + errs: new(errList), + data: b, + pt: savepoint{position: position{line: 1}}, + recover: true, + cur: current{ + state: make(storeDict), + globalStore: make(storeDict), + }, + maxFailPos: position{col: 1, line: 1}, + maxFailExpected: make([]string, 0, 20), + Stats: &stats, + // start rule is rule [0] unless an alternate entrypoint is specified + entrypoint: g.rules[0].name, + } + p.setOptions(opts) + + if p.maxExprCnt == 0 { + p.maxExprCnt = math.MaxUint64 + } + + return p +} + +// setOptions applies the options to the parser. +func (p *parser) setOptions(opts []Option) { + for _, opt := range opts { + opt(p) + } +} + +// nolint: structcheck,deadcode +type resultTuple struct { + v any + b bool + end savepoint +} + +// nolint: varcheck +const choiceNoMatch = -1 + +// Stats stores some statistics, gathered during parsing +type Stats struct { + // ExprCnt counts the number of expressions processed during parsing + // This value is compared to the maximum number of expressions allowed + // (set by the MaxExpressions option). + ExprCnt uint64 + + // ChoiceAltCnt is used to count for each ordered choice expression, + // which alternative is used how may times. + // These numbers allow to optimize the order of the ordered choice expression + // to increase the performance of the parser + // + // The outer key of ChoiceAltCnt is composed of the name of the rule as well + // as the line and the column of the ordered choice. + // The inner key of ChoiceAltCnt is the number (one-based) of the matching alternative. + // For each alternative the number of matches are counted. If an ordered choice does not + // match, a special counter is incremented. The name of this counter is set with + // the parser option Statistics. + // For an alternative to be included in ChoiceAltCnt, it has to match at least once. + ChoiceAltCnt map[string]map[string]int +} + +type ruleWithExpsStack struct { + rule *rule + estack []any +} + +// nolint: structcheck,maligned +type parser struct { + filename string + pt savepoint + cur current + + data []byte + errs *errList + + depth int + recover bool + debug bool + + memoize bool + // memoization table for the packrat algorithm: + // map[offset in source] map[expression or rule] {value, match} + memo map[int]map[any]resultTuple + + // rules table, maps the rule identifier to the rule node + rules map[string]*rule + // variables stack, map of label to value + vstack []map[string]any + // rule stack, allows identification of the current rule in errors + rstack []*rule + + // parse fail + maxFailPos position + maxFailExpected []string + maxFailInvertExpected bool + + // max number of expressions to be parsed + maxExprCnt uint64 + // entrypoint for the parser + entrypoint string + + allowInvalidUTF8 bool + + *Stats + + choiceNoMatch string + // recovery expression stack, keeps track of the currently available recovery expression, these are traversed in reverse + recoveryStack []map[string]any +} + +// push a variable set on the vstack. +func (p *parser) pushV() { + if cap(p.vstack) == len(p.vstack) { + // create new empty slot in the stack + p.vstack = append(p.vstack, nil) + } else { + // slice to 1 more + p.vstack = p.vstack[:len(p.vstack)+1] + } + + // get the last args set + m := p.vstack[len(p.vstack)-1] + if m != nil && len(m) == 0 { + // empty map, all good + return + } + + m = make(map[string]any) + p.vstack[len(p.vstack)-1] = m +} + +// pop a variable set from the vstack. +func (p *parser) popV() { + // if the map is not empty, clear it + m := p.vstack[len(p.vstack)-1] + if len(m) > 0 { + // GC that map + p.vstack[len(p.vstack)-1] = nil + } + p.vstack = p.vstack[:len(p.vstack)-1] +} + +// push a recovery expression with its labels to the recoveryStack +func (p *parser) pushRecovery(labels []string, expr any) { + if cap(p.recoveryStack) == len(p.recoveryStack) { + // create new empty slot in the stack + p.recoveryStack = append(p.recoveryStack, nil) + } else { + // slice to 1 more + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)+1] + } + + m := make(map[string]any, len(labels)) + for _, fl := range labels { + m[fl] = expr + } + p.recoveryStack[len(p.recoveryStack)-1] = m +} + +// pop a recovery expression from the recoveryStack +func (p *parser) popRecovery() { + // GC that map + p.recoveryStack[len(p.recoveryStack)-1] = nil + + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)-1] +} + +func (p *parser) print(prefix, s string) string { + if !p.debug { + return s + } + + fmt.Printf("%s %d:%d:%d: %s [%#U]\n", + prefix, p.pt.line, p.pt.col, p.pt.offset, s, p.pt.rn) + return s +} + +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + +func (p *parser) in(s string) string { + res := p.printIndent(">", s) + p.depth++ + return res +} + +func (p *parser) out(s string) string { + p.depth-- + return p.printIndent("<", s) +} + +func (p *parser) addErr(err error) { + p.addErrAt(err, p.pt.position, []string{}) +} + +func (p *parser) addErrAt(err error, pos position, expected []string) { + var buf bytes.Buffer + if p.filename != "" { + buf.WriteString(p.filename) + } + if buf.Len() > 0 { + buf.WriteString(":") + } + buf.WriteString(fmt.Sprintf("%d:%d (%d)", pos.line, pos.col, pos.offset)) + if len(p.rstack) > 0 { + if buf.Len() > 0 { + buf.WriteString(": ") + } + rule := p.rstack[len(p.rstack)-1] + if rule.displayName != "" { + buf.WriteString("rule " + rule.displayName) + } else { + buf.WriteString("rule " + rule.name) + } + } + pe := &parserError{Inner: err, pos: pos, prefix: buf.String(), expected: expected} + p.errs.add(pe) +} + +func (p *parser) failAt(fail bool, pos position, want string) { + // process fail if parsing fails and not inverted or parsing succeeds and invert is set + if fail == p.maxFailInvertExpected { + if pos.offset < p.maxFailPos.offset { + return + } + + if pos.offset > p.maxFailPos.offset { + p.maxFailPos = pos + p.maxFailExpected = p.maxFailExpected[:0] + } + + if p.maxFailInvertExpected { + want = "!" + want + } + p.maxFailExpected = append(p.maxFailExpected, want) + } +} + +// read advances the parser to the next rune. +func (p *parser) read() { + p.pt.offset += p.pt.w + rn, n := utf8.DecodeRune(p.data[p.pt.offset:]) + p.pt.rn = rn + p.pt.w = n + p.pt.col++ + if rn == '\n' { + p.pt.line++ + p.pt.col = 0 + } + + if rn == utf8.RuneError && n == 1 { // see utf8.DecodeRune + if !p.allowInvalidUTF8 { + p.addErr(errInvalidEncoding) + } + } +} + +// restore parser position to the savepoint pt. +func (p *parser) restore(pt savepoint) { + if p.debug { + defer p.out(p.in("restore")) + } + if pt.offset == p.pt.offset { + return + } + p.pt = pt +} + +// Cloner is implemented by any value that has a Clone method, which returns a +// copy of the value. This is mainly used for types which are not passed by +// value (e.g map, slice, chan) or structs that contain such types. +// +// This is used in conjunction with the global state feature to create proper +// copies of the state to allow the parser to properly restore the state in +// the case of backtracking. +type Cloner interface { + Clone() any +} + +var statePool = &sync.Pool{ + New: func() any { return make(storeDict) }, +} + +func (sd storeDict) Discard() { + for k := range sd { + delete(sd, k) + } + statePool.Put(sd) +} + +// clone and return parser current state. +func (p *parser) cloneState() storeDict { + if p.debug { + defer p.out(p.in("cloneState")) + } + + state := statePool.Get().(storeDict) + for k, v := range p.cur.state { + if c, ok := v.(Cloner); ok { + state[k] = c.Clone() + } else { + state[k] = v + } + } + return state +} + +// restore parser current state to the state storeDict. +// every restoreState should applied only one time for every cloned state +func (p *parser) restoreState(state storeDict) { + if p.debug { + defer p.out(p.in("restoreState")) + } + p.cur.state.Discard() + p.cur.state = state +} + +// get the slice of bytes from the savepoint start to the current position. +func (p *parser) sliceFrom(start savepoint) []byte { + return p.data[start.position.offset:p.pt.position.offset] +} + +func (p *parser) getMemoized(node any) (resultTuple, bool) { + if len(p.memo) == 0 { + return resultTuple{}, false + } + m := p.memo[p.pt.offset] + if len(m) == 0 { + return resultTuple{}, false + } + res, ok := m[node] + return res, ok +} + +func (p *parser) setMemoized(pt savepoint, node any, tuple resultTuple) { + if p.memo == nil { + p.memo = make(map[int]map[any]resultTuple) + } + m := p.memo[pt.offset] + if m == nil { + m = make(map[any]resultTuple) + p.memo[pt.offset] = m + } + m[node] = tuple +} + +func (p *parser) buildRulesTable(g *grammar) { + p.rules = make(map[string]*rule, len(g.rules)) + for _, r := range g.rules { + p.rules[r.name] = r + } +} + +// nolint: gocyclo +func (p *parser) parse(g *grammar) (val any, err error) { + if len(g.rules) == 0 { + p.addErr(errNoRule) + return nil, p.errs.err() + } + + // TODO : not super critical but this could be generated + p.buildRulesTable(g) + + if p.recover { + // panic can be used in action code to stop parsing immediately + // and return the panic as an error. + defer func() { + if e := recover(); e != nil { + if p.debug { + defer p.out(p.in("panic handler")) + } + val = nil + switch e := e.(type) { + case error: + p.addErr(e) + default: + p.addErr(fmt.Errorf("%v", e)) + } + err = p.errs.err() + } + }() + } + + startRule, ok := p.rules[p.entrypoint] + if !ok { + p.addErr(errInvalidEntrypoint) + return nil, p.errs.err() + } + + p.read() // advance to first rune + val, ok = p.parseRuleWrap(startRule) + if !ok { + if len(*p.errs) == 0 { + // If parsing fails, but no errors have been recorded, the expected values + // for the farthest parser position are returned as error. + maxFailExpectedMap := make(map[string]struct{}, len(p.maxFailExpected)) + for _, v := range p.maxFailExpected { + maxFailExpectedMap[v] = struct{}{} + } + expected := make([]string, 0, len(maxFailExpectedMap)) + eof := false + if _, ok := maxFailExpectedMap["!."]; ok { + delete(maxFailExpectedMap, "!.") + eof = true + } + for k := range maxFailExpectedMap { + expected = append(expected, k) + } + sort.Strings(expected) + if eof { + expected = append(expected, "EOF") + } + p.addErrAt(errors.New("no match found, expected: "+listJoin(expected, ", ", "or")), p.maxFailPos, expected) + } + + return nil, p.errs.err() + } + return val, p.errs.err() +} + +func listJoin(list []string, sep string, lastSep string) string { + switch len(list) { + case 0: + return "" + case 1: + return list[0] + default: + return strings.Join(list[:len(list)-1], sep) + " " + lastSep + " " + list[len(list)-1] + } +} + +func (p *parser) parseRuleRecursiveLeader(rule *rule) (any, bool) { + result, ok := p.getMemoized(rule) + if ok { + p.restore(result.end) + return result.v, result.b + } + + if p.debug { + defer p.out(p.in("recursive " + rule.name)) + } + + var ( + depth = 0 + startMark = p.pt + lastResult = resultTuple{nil, false, startMark} + lastErrors = *p.errs + ) + + for { + lastState := p.cloneState() + p.setMemoized(startMark, rule, lastResult) + val, ok := p.parseRule(rule) + endMark := p.pt + if p.debug { + p.printIndent("RECURSIVE", fmt.Sprintf( + "Rule %s depth %d: %t -> %s", + rule.name, depth, ok, string(p.sliceFrom(startMark)))) + } + if (!ok) || (endMark.offset <= lastResult.end.offset && depth != 0) { + p.restoreState(lastState) + *p.errs = lastErrors + break + } + lastResult = resultTuple{val, ok, endMark} + lastErrors = *p.errs + p.restore(startMark) + depth++ + } + + p.restore(lastResult.end) + p.setMemoized(startMark, rule, lastResult) + return lastResult.v, lastResult.b +} + +func (p *parser) parseRuleRecursiveNoLeader(rule *rule) (any, bool) { + return p.parseRule(rule) +} + +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { + if p.debug { + defer p.out(p.in("parseRule " + rule.name)) + } + var ( + val any + ok bool + startMark = p.pt + ) + + if p.memoize || rule.leftRecursive { + if rule.leader { + val, ok = p.parseRuleRecursiveLeader(rule) + } else if p.memoize && !rule.leftRecursive { + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRuleRecursiveNoLeader(rule) + } + } else { + val, ok = p.parseRule(rule) + } + + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { + p.rstack = append(p.rstack, rule) + p.pushV() + val, ok := p.parseExprWrap(rule.expr) + p.popV() + p.rstack = p.rstack[:len(p.rstack)-1] + return val, ok +} + +func (p *parser) parseExprWrap(expr any) (any, bool) { + var pt savepoint + + isLeftRecusion := p.rstack[len(p.rstack)-1].leftRecursive + if p.memoize && !isLeftRecusion { + res, ok := p.getMemoized(expr) + if ok { + p.restore(res.end) + return res.v, res.b + } + pt = p.pt + } + + val, ok := p.parseExpr(expr) + + if p.memoize && !isLeftRecusion { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { + p.ExprCnt++ + if p.ExprCnt > p.maxExprCnt { + panic(errMaxExprCnt) + } + + var val any + var ok bool + switch expr := expr.(type) { + case *actionExpr: + val, ok = p.parseActionExpr(expr) + case *andCodeExpr: + val, ok = p.parseAndCodeExpr(expr) + case *andExpr: + val, ok = p.parseAndExpr(expr) + case *anyMatcher: + val, ok = p.parseAnyMatcher(expr) + case *charClassMatcher: + val, ok = p.parseCharClassMatcher(expr) + case *choiceExpr: + val, ok = p.parseChoiceExpr(expr) + case *labeledExpr: + val, ok = p.parseLabeledExpr(expr) + case *litMatcher: + val, ok = p.parseLitMatcher(expr) + case *notCodeExpr: + val, ok = p.parseNotCodeExpr(expr) + case *notExpr: + val, ok = p.parseNotExpr(expr) + case *oneOrMoreExpr: + val, ok = p.parseOneOrMoreExpr(expr) + case *recoveryExpr: + val, ok = p.parseRecoveryExpr(expr) + case *ruleRefExpr: + val, ok = p.parseRuleRefExpr(expr) + case *seqExpr: + val, ok = p.parseSeqExpr(expr) + case *stateCodeExpr: + val, ok = p.parseStateCodeExpr(expr) + case *throwExpr: + val, ok = p.parseThrowExpr(expr) + case *zeroOrMoreExpr: + val, ok = p.parseZeroOrMoreExpr(expr) + case *zeroOrOneExpr: + val, ok = p.parseZeroOrOneExpr(expr) + default: + panic(fmt.Sprintf("unknown expression type %T", expr)) + } + return val, ok +} + +func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseActionExpr")) + } + + start := p.pt + val, ok := p.parseExprWrap(act.expr) + if ok { + p.cur.pos = start.position + p.cur.text = p.sliceFrom(start) + state := p.cloneState() + actVal, err := act.run(p) + if err != nil { + p.addErrAt(err, start.position, []string{}) + } + p.restoreState(state) + + val = actVal + } + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(start))) + } + return val, ok +} + +func (p *parser) parseAndCodeExpr(and *andCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseAndCodeExpr")) + } + + state := p.cloneState() + + ok, err := and.run(p) + if err != nil { + p.addErr(err) + } + p.restoreState(state) + + return nil, ok +} + +func (p *parser) parseAndExpr(and *andExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseAndExpr")) + } + + pt := p.pt + state := p.cloneState() + p.pushV() + _, ok := p.parseExprWrap(and.expr) + p.popV() + p.restoreState(state) + p.restore(pt) + + return nil, ok +} + +func (p *parser) parseAnyMatcher(any *anyMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseAnyMatcher")) + } + + if p.pt.rn == utf8.RuneError && p.pt.w == 0 { + // EOF - see utf8.DecodeRune + p.failAt(false, p.pt.position, ".") + return nil, false + } + start := p.pt + p.read() + p.failAt(true, start.position, ".") + return p.sliceFrom(start), true +} + +// nolint: gocyclo +func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseCharClassMatcher")) + } + + cur := p.pt.rn + start := p.pt + + // can't match EOF + if cur == utf8.RuneError && p.pt.w == 0 { // see utf8.DecodeRune + p.failAt(false, start.position, chr.val) + return nil, false + } + + if chr.ignoreCase { + cur = unicode.ToLower(cur) + } + + // try to match in the list of available chars + for _, rn := range chr.chars { + if rn == cur { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of ranges + for i := 0; i < len(chr.ranges); i += 2 { + if cur >= chr.ranges[i] && cur <= chr.ranges[i+1] { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of Unicode classes + for _, cl := range chr.classes { + if unicode.Is(cl, cur) { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + if chr.inverted { + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + p.failAt(false, start.position, chr.val) + return nil, false +} + +func (p *parser) incChoiceAltCnt(ch *choiceExpr, altI int) { + choiceIdent := fmt.Sprintf("%s %d:%d", p.rstack[len(p.rstack)-1].name, ch.pos.line, ch.pos.col) + m := p.ChoiceAltCnt[choiceIdent] + if m == nil { + m = make(map[string]int) + p.ChoiceAltCnt[choiceIdent] = m + } + // We increment altI by 1, so the keys do not start at 0 + alt := strconv.Itoa(altI + 1) + if altI == choiceNoMatch { + alt = p.choiceNoMatch + } + m[alt]++ +} + +func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseChoiceExpr")) + } + + for altI, alt := range ch.alternatives { + // dummy assignment to prevent compile error if optimized + _ = altI + + state := p.cloneState() + + p.pushV() + val, ok := p.parseExprWrap(alt) + p.popV() + if ok { + p.incChoiceAltCnt(ch, altI) + return val, ok + } + p.restoreState(state) + } + p.incChoiceAltCnt(ch, choiceNoMatch) + return nil, false +} + +func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseLabeledExpr")) + } + + p.pushV() + val, ok := p.parseExprWrap(lab.expr) + p.popV() + if ok && lab.label != "" { + m := p.vstack[len(p.vstack)-1] + m[lab.label] = val + } + return val, ok +} + +func (p *parser) parseLitMatcher(lit *litMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseLitMatcher")) + } + + start := p.pt + for _, want := range lit.val { + cur := p.pt.rn + if lit.ignoreCase { + cur = unicode.ToLower(cur) + } + if cur != want { + p.failAt(false, start.position, lit.want) + p.restore(start) + return nil, false + } + p.read() + } + p.failAt(true, start.position, lit.want) + return p.sliceFrom(start), true +} + +func (p *parser) parseNotCodeExpr(not *notCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseNotCodeExpr")) + } + + state := p.cloneState() + + ok, err := not.run(p) + if err != nil { + p.addErr(err) + } + p.restoreState(state) + + return nil, !ok +} + +func (p *parser) parseNotExpr(not *notExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseNotExpr")) + } + + pt := p.pt + state := p.cloneState() + p.pushV() + p.maxFailInvertExpected = !p.maxFailInvertExpected + _, ok := p.parseExprWrap(not.expr) + p.maxFailInvertExpected = !p.maxFailInvertExpected + p.popV() + p.restoreState(state) + p.restore(pt) + + return nil, !ok +} + +func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseOneOrMoreExpr")) + } + + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + if len(vals) == 0 { + // did not match once, no match + return nil, false + } + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseRecoveryExpr (" + strings.Join(recover.failureLabel, ",") + ")")) + } + + p.pushRecovery(recover.failureLabel, recover.recoverExpr) + val, ok := p.parseExprWrap(recover.expr) + p.popRecovery() + + return val, ok +} + +func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseRuleRefExpr " + ref.name)) + } + + if ref.name == "" { + panic(fmt.Sprintf("%s: invalid rule: missing name", ref.pos)) + } + + rule := p.rules[ref.name] + if rule == nil { + p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) + return nil, false + } + return p.parseRuleWrap(rule) +} + +func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseSeqExpr")) + } + + vals := make([]any, 0, len(seq.exprs)) + + pt := p.pt + state := p.cloneState() + for _, expr := range seq.exprs { + val, ok := p.parseExprWrap(expr) + if !ok { + p.restoreState(state) + p.restore(pt) + return nil, false + } + vals = append(vals, val) + } + return vals, true +} + +func (p *parser) parseStateCodeExpr(state *stateCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseStateCodeExpr")) + } + + err := state.run(p) + if err != nil { + p.addErr(err) + } + return nil, true +} + +func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseThrowExpr")) + } + + for i := len(p.recoveryStack) - 1; i >= 0; i-- { + if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { + return val, ok + } + } + } + + return nil, false +} + +func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseZeroOrMoreExpr")) + } + + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseZeroOrOneExpr")) + } + + p.pushV() + val, _ := p.parseExprWrap(expr.expr) + p.popV() + // whether it matched or not, consider it a match + return val, true +} diff --git a/test/left_recursion_labeled_failures/left_recursion_labeled_failures.peg b/test/left_recursion_labeled_failures/left_recursion_labeled_failures.peg new file mode 100644 index 0000000..dc43fda --- /dev/null +++ b/test/left_recursion_labeled_failures/left_recursion_labeled_failures.peg @@ -0,0 +1,40 @@ +{ +package leftrecursionlabeledfailures + +func ids(list, id any) (any, error) { + l := toStringSlice(list) + l = append(l, id.(string)) + return l, nil +} + +func toStringSlice(v any) []string { + if v == nil { + return nil + } + return v.([]string) +} +} + +S ← list:List { + return list.([]string), nil +} //{errComma} ErrComma //{errId} ErrID + +List ← (list:List Comma id:ID) { + return ids(list, id) +} / id:ID { + return []string{id.(string)}, nil +} + +ID ← Sp [a-z]+ { + return strings.TrimLeft(string(c.text), " \t\r\n"), nil +} / %{errId} + +Comma ← Sp ',' / %{errComma} +Sp ← [ \t\r\n]* + +ErrComma ← #{ + return errors.New("expecting ','") + } ( !([a-z]+) .)* +ErrID ← #{ + return errors.New("expecting an identifier") + } ( !(',') .)* { return "NONE", nil } diff --git a/test/left_recursion_labeled_failures/left_recursion_labeled_failures_test.go b/test/left_recursion_labeled_failures/left_recursion_labeled_failures_test.go new file mode 100644 index 0000000..259c57e --- /dev/null +++ b/test/left_recursion_labeled_failures/left_recursion_labeled_failures_test.go @@ -0,0 +1,165 @@ +package leftrecursionlabeledfailures_test + +import ( + "errors" + "reflect" + "testing" + + leftrecursionlabeledfailures "github.com/mna/pigeon/test/left_recursion_labeled_failures" +) + +func TestLeftRecursionWithLabeledFailures(t *testing.T) { + t.Parallel() + + type want struct { + captures []string + errors []string + } + + cases := []struct { + name string + input string + want want + }{ + // Test cases from reference implementation peglabel: + // https://github.com/sqmedeiros/lpeglabel/blob/976b38458e0bba58ca748e96b53afd9ee74a1d1d/README.md#relabel-syntax + // https://github.com/sqmedeiros/lpeglabel/blame/976b38458e0bba58ca748e96b53afd9ee74a1d1d/README.md#L418-L440 + { + name: "correct", + input: "one,two", + want: want{captures: []string{"one", "two"}}, + }, + { + name: "missing commas", + input: "one two three", + want: want{ + captures: []string{"one", "two", "three"}, + errors: []string{ + "1:4 (3): rule ErrComma: expecting ','", + "1:8 (7): rule ErrComma: expecting ','", + }, + }, + }, + { + name: "missing id and incorrect ids", + input: "1,\n two, \n3,", + want: want{ + captures: []string{"NONE", "two", "NONE", "NONE"}, + errors: []string{ + "1:1 (0): rule ErrID: expecting an identifier", + "2:6 (8): rule ErrID: expecting an identifier", + // is line 3, col 2 in peglabel, pigeon increments the position + // behind the last character of the input if !. is matched + "3:3 (12): rule ErrID: expecting an identifier", + }, + }, + }, + { + name: "missing comma, id and incorrect id", + input: "one\n two123, \nthree,", + want: want{ + captures: []string{"one", "two", "three", "NONE"}, + errors: []string{ + // is line 2, col 1 in peglabel, in pigeon, if a \n causes + // an error, this is at col 0 + "2:0 (3): rule ErrComma: expecting ','", + "2:5 (8): rule ErrComma: expecting ','", + // is line 3, col 6 in peglabel, pigeon increments the position + // behind the last character of the input if !. is matched + "3:7 (20): rule ErrID: expecting an identifier", + }, + }, + }, + // Additional test cases + { + name: "empty", + input: "", + want: want{ + captures: []string{"NONE"}, + errors: []string{ + "1:1 (0): rule ErrID: expecting an identifier", + }, + }, + }, + { + name: "incorrect id", + input: "1", + want: want{ + captures: []string{"NONE"}, + errors: []string{"1:1 (0): rule ErrID: expecting an identifier"}, + }, + }, + { + name: "incorrect ids", + input: "1,2", + want: want{ + captures: []string{"NONE", "NONE"}, + errors: []string{ + "1:1 (0): rule ErrID: expecting an identifier", + "1:3 (2): rule ErrID: expecting an identifier", + }, + }, + }, + } + for _, testCase := range cases { + testCase := testCase + + setOptions := map[string][]leftrecursionlabeledfailures.Option{ + "memoize": {leftrecursionlabeledfailures.Memoize(true)}, + "-": {}, + } + for nameOptions, options := range setOptions { + options := options + + t.Run(testCase.name+". Options: "+nameOptions, func(t *testing.T) { + t.Parallel() + + got, err := leftrecursionlabeledfailures.Parse( + "", []byte(testCase.input), options...) + if testCase.want.errors == nil && err != nil { + t.Fatalf( + "for input %q got error: %s, but expect to parse without errors", + testCase.input, err) + } + if !reflect.DeepEqual(got, testCase.want.captures) { + t.Errorf( + "for input %q want %s, got %s", + testCase.input, testCase.want.captures, got) + } + if err != nil { + var errorLister leftrecursionlabeledfailures.ErrorLister + if !errors.As(err, &errorLister) { + t.FailNow() + } + list := errorLister.Errors() + if len(list) != len(testCase.want.errors) { + t.Errorf( + "for input %q want %d error(s), got %d", + testCase.input, len(testCase.want.errors), len(list)) + t.Logf("expected errors:\n") + for _, ee := range testCase.want.errors { + t.Logf("- %s\n", ee) + } + t.Logf("got errors:\n") + for _, ee := range list { + t.Logf("- %s\n", ee) + } + t.FailNow() + } + for index, err := range list { + var parserError leftrecursionlabeledfailures.ParserError + if !errors.As(err, &parserError) { + t.FailNow() + } + if parserError.Error() != testCase.want.errors[index] { + t.Errorf( + "for input %q want %dth error to be %s, got %s", + testCase.input, index+1, + testCase.want.errors[index], parserError) + } + } + } + }) + } + } +} diff --git a/test/left_recursion_state/left_recursion_state.peg b/test/left_recursion_state/left_recursion_state.peg new file mode 100644 index 0000000..5eedd8e --- /dev/null +++ b/test/left_recursion_state/left_recursion_state.peg @@ -0,0 +1,41 @@ +{ + package leftrecursionstate +} + +start = #{ + if _, ok := c.state["count"]; !ok { + c.state["count"] = 0 + } + return nil +} (a:expr)? { + return c.state["count"], nil +} + +expr = (expr ('+' / '-') term) #{ + c.state["count"] = c.state["count"].(int) + 1; + return nil +} / term #{ + c.state["count"] = c.state["count"].(int) + 3; + return nil +} + +term = (term ('*' / '/' / '%') factor) #{ + c.state["count"] = c.state["count"].(int) + 7; + return nil +} / factor #{ + c.state["count"] = c.state["count"].(int) + 15; + return nil +} + +factor = (('+' / '-') factor) #{ + c.state["count"] = c.state["count"].(int) + 31; + return nil +} / atom #{ + c.state["count"] = c.state["count"].(int) + 63; + return nil +} + +atom = ([0-9]+) #{ + c.state["count"] = c.state["count"].(int) + 127; + return nil +} diff --git a/test/left_recursion_state/left_recursion_state_test.go b/test/left_recursion_state/left_recursion_state_test.go new file mode 100644 index 0000000..f2149fe --- /dev/null +++ b/test/left_recursion_state/left_recursion_state_test.go @@ -0,0 +1,115 @@ +package leftrecursionstate_test + +import ( + "testing" + + optimizedleftrecursionstate "github.com/mna/pigeon/test/left_recursion_state/optimized" + leftrecursionstate "github.com/mna/pigeon/test/left_recursion_state/standart" +) + +func TestLeftRecursionWithState(t *testing.T) { + t.Parallel() + + initCount := 100000 + + type want struct { + count int + } + + tests := []struct { + name string + expr string + want want + }{ + { + name: "atom", + expr: "1", + want: want{count: 3 + 15 + 63 + 127 + initCount}, + }, + { + name: "factor", + expr: "-1", + want: want{count: 3 + 15 + 31 + 63 + 127 + initCount}, + }, + { + name: "expr", + expr: "1+1", + want: want{count: 1 + + (3 + 15 + 63 + 127) + + (15 + 63 + 127) + initCount}, + }, + { + name: "expr", + expr: "1*1*1", + want: want{count: 3 + + 7 + + 7 + + (15 + 63 + 127) + + (63 + 127) + + (63 + 127) + + +initCount}, + }, + { + name: "invalid", + expr: "**", + want: want{count: initCount}, + }, + } + + for _, testCase := range tests { + testCase := testCase + + setOptions := map[string][]leftrecursionstate.Option{ + "memoize": { + leftrecursionstate.Memoize(true), + leftrecursionstate.InitState("count", initCount), + }, + "-": { + leftrecursionstate.InitState("count", initCount), + }, + } + for nameOptions, options := range setOptions { + options := options + + t.Run(testCase.name+" default. Options: "+nameOptions, func(t *testing.T) { + t.Parallel() + + count, err := leftrecursionstate.Parse( + "", []byte(testCase.expr), options...) + if err != nil { + t.Fatalf( + "for input %q got error: %s, but expect to parse without errors", + testCase.expr, err) + } + if count != testCase.want.count { + t.Fatalf( + "for input %q\ngot result: %d,\nbut expect: %d", + testCase.expr, count, testCase.want.count) + } + }) + } + + t.Run(testCase.name+" optimized", func(t *testing.T) { + t.Parallel() + + count, err := optimizedleftrecursionstate.Parse( + "", []byte(testCase.expr), + optimizedleftrecursionstate.InitState("count", initCount)) + if err != nil { + t.Fatalf( + "for input %q got error: %s, but expect to parse without errors", + testCase.expr, err) + } + if count != testCase.want.count { + t.Fatalf( + "for input %q\ngot result: %q,\nbut expect: %q", + testCase.expr, count, testCase.want.count) + } + if count != testCase.want.count { + t.Fatalf( + "for input %q\ngot result: %d,\nbut expect: %d", + testCase.expr, count, testCase.want.count) + } + }) + } +} diff --git a/test/left_recursion_state/optimized/left_recursion_state.go b/test/left_recursion_state/optimized/left_recursion_state.go new file mode 100644 index 0000000..e6a9787 --- /dev/null +++ b/test/left_recursion_state/optimized/left_recursion_state.go @@ -0,0 +1,1533 @@ +// Code generated by pigeon; DO NOT EDIT. + +package leftrecursionstate + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "os" + "sort" + "strconv" + "strings" + "sync" + "unicode" + "unicode/utf8" +) + +var g = &grammar{ + rules: []*rule{ + { + name: "start", + pos: position{line: 5, col: 1, offset: 34}, + expr: &actionExpr{ + pos: position{line: 5, col: 9, offset: 42}, + run: (*parser).callonstart1, + expr: &seqExpr{ + pos: position{line: 5, col: 9, offset: 42}, + exprs: []any{ + &stateCodeExpr{ + pos: position{line: 5, col: 9, offset: 42}, + run: (*parser).callonstart3, + }, + &zeroOrOneExpr{ + pos: position{line: 10, col: 3, offset: 128}, + expr: &labeledExpr{ + pos: position{line: 10, col: 4, offset: 129}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 10, col: 6, offset: 131}, + name: "expr", + }, + }, + }, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "expr", + pos: position{line: 14, col: 1, offset: 173}, + expr: &choiceExpr{ + pos: position{line: 14, col: 9, offset: 181}, + alternatives: []any{ + &seqExpr{ + pos: position{line: 14, col: 9, offset: 181}, + exprs: []any{ + &seqExpr{ + pos: position{line: 14, col: 10, offset: 182}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 14, col: 10, offset: 182}, + name: "expr", + }, + &choiceExpr{ + pos: position{line: 14, col: 16, offset: 188}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 14, col: 16, offset: 188}, + val: "+", + ignoreCase: false, + want: "\"+\"", + }, + &litMatcher{ + pos: position{line: 14, col: 22, offset: 194}, + val: "-", + ignoreCase: false, + want: "\"-\"", + }, + }, + }, + &ruleRefExpr{ + pos: position{line: 14, col: 27, offset: 199}, + name: "term", + }, + }, + }, + &stateCodeExpr{ + pos: position{line: 14, col: 33, offset: 205}, + run: (*parser).callonexpr9, + }, + }, + }, + &seqExpr{ + pos: position{line: 17, col: 5, offset: 278}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 17, col: 5, offset: 278}, + name: "term", + }, + &stateCodeExpr{ + pos: position{line: 17, col: 10, offset: 283}, + run: (*parser).callonexpr12, + }, + }, + }, + }, + }, + leader: true, + leftRecursive: true, + }, + { + name: "term", + pos: position{line: 22, col: 1, offset: 355}, + expr: &choiceExpr{ + pos: position{line: 22, col: 8, offset: 362}, + alternatives: []any{ + &seqExpr{ + pos: position{line: 22, col: 8, offset: 362}, + exprs: []any{ + &seqExpr{ + pos: position{line: 22, col: 9, offset: 363}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 22, col: 9, offset: 363}, + name: "term", + }, + &choiceExpr{ + pos: position{line: 22, col: 15, offset: 369}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 22, col: 15, offset: 369}, + val: "*", + ignoreCase: false, + want: "\"*\"", + }, + &litMatcher{ + pos: position{line: 22, col: 21, offset: 375}, + val: "/", + ignoreCase: false, + want: "\"/\"", + }, + &litMatcher{ + pos: position{line: 22, col: 27, offset: 381}, + val: "%", + ignoreCase: false, + want: "\"%\"", + }, + }, + }, + &ruleRefExpr{ + pos: position{line: 22, col: 32, offset: 386}, + name: "factor", + }, + }, + }, + &stateCodeExpr{ + pos: position{line: 22, col: 40, offset: 394}, + run: (*parser).callonterm10, + }, + }, + }, + &seqExpr{ + pos: position{line: 25, col: 5, offset: 467}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 25, col: 5, offset: 467}, + name: "factor", + }, + &stateCodeExpr{ + pos: position{line: 25, col: 12, offset: 474}, + run: (*parser).callonterm13, + }, + }, + }, + }, + }, + leader: true, + leftRecursive: true, + }, + { + name: "factor", + pos: position{line: 30, col: 1, offset: 547}, + expr: &choiceExpr{ + pos: position{line: 30, col: 10, offset: 556}, + alternatives: []any{ + &seqExpr{ + pos: position{line: 30, col: 10, offset: 556}, + exprs: []any{ + &seqExpr{ + pos: position{line: 30, col: 11, offset: 557}, + exprs: []any{ + &choiceExpr{ + pos: position{line: 30, col: 12, offset: 558}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 30, col: 12, offset: 558}, + val: "+", + ignoreCase: false, + want: "\"+\"", + }, + &litMatcher{ + pos: position{line: 30, col: 18, offset: 564}, + val: "-", + ignoreCase: false, + want: "\"-\"", + }, + }, + }, + &ruleRefExpr{ + pos: position{line: 30, col: 23, offset: 569}, + name: "factor", + }, + }, + }, + &stateCodeExpr{ + pos: position{line: 30, col: 31, offset: 577}, + run: (*parser).callonfactor8, + }, + }, + }, + &seqExpr{ + pos: position{line: 33, col: 5, offset: 651}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 33, col: 5, offset: 651}, + name: "atom", + }, + &stateCodeExpr{ + pos: position{line: 33, col: 10, offset: 656}, + run: (*parser).callonfactor11, + }, + }, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "atom", + pos: position{line: 38, col: 1, offset: 729}, + expr: &seqExpr{ + pos: position{line: 38, col: 8, offset: 736}, + exprs: []any{ + &oneOrMoreExpr{ + pos: position{line: 38, col: 9, offset: 737}, + expr: &charClassMatcher{ + pos: position{line: 38, col: 9, offset: 737}, + val: "[0-9]", + ranges: []rune{'0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + &stateCodeExpr{ + pos: position{line: 38, col: 17, offset: 745}, + run: (*parser).callonatom4, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + }, +} + +func (c *current) onstart3() error { + + if _, ok := c.state["count"]; !ok { + c.state["count"] = 0 + } + return nil +} + +func (p *parser) callonstart3() error { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onstart3() +} + +func (c *current) onstart1() (any, error) { + return c.state["count"], nil +} + +func (p *parser) callonstart1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onstart1() +} + +func (c *current) onexpr9() error { + c.state["count"] = c.state["count"].(int) + 1 + return nil +} + +func (p *parser) callonexpr9() error { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onexpr9() +} + +func (c *current) onexpr12() error { + c.state["count"] = c.state["count"].(int) + 3 + return nil +} + +func (p *parser) callonexpr12() error { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onexpr12() +} + +func (c *current) onterm10() error { + c.state["count"] = c.state["count"].(int) + 7 + return nil +} + +func (p *parser) callonterm10() error { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onterm10() +} + +func (c *current) onterm13() error { + c.state["count"] = c.state["count"].(int) + 15 + return nil +} + +func (p *parser) callonterm13() error { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onterm13() +} + +func (c *current) onfactor8() error { + c.state["count"] = c.state["count"].(int) + 31 + return nil +} + +func (p *parser) callonfactor8() error { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onfactor8() +} + +func (c *current) onfactor11() error { + c.state["count"] = c.state["count"].(int) + 63 + return nil +} + +func (p *parser) callonfactor11() error { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onfactor11() +} + +func (c *current) onatom4() error { + c.state["count"] = c.state["count"].(int) + 127 + return nil +} + +func (p *parser) callonatom4() error { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onatom4() +} + +var ( + // errNoRule is returned when the grammar to parse has no rule. + errNoRule = errors.New("grammar has no rule") + + // errInvalidEntrypoint is returned when the specified entrypoint rule + // does not exit. + errInvalidEntrypoint = errors.New("invalid entrypoint") + + // errInvalidEncoding is returned when the source is not properly + // utf8-encoded. + errInvalidEncoding = errors.New("invalid encoding") + + // errMaxExprCnt is used to signal that the maximum number of + // expressions have been parsed. + errMaxExprCnt = errors.New("max number of expresssions parsed") +) + +// Option is a function that can set an option on the parser. It returns +// the previous setting as an Option. +type Option func(*parser) Option + +// MaxExpressions creates an Option to stop parsing after the provided +// number of expressions have been parsed, if the value is 0 then the parser will +// parse for as many steps as needed (possibly an infinite number). +// +// The default for maxExprCnt is 0. +func MaxExpressions(maxExprCnt uint64) Option { + return func(p *parser) Option { + oldMaxExprCnt := p.maxExprCnt + p.maxExprCnt = maxExprCnt + return MaxExpressions(oldMaxExprCnt) + } +} + +// Entrypoint creates an Option to set the rule name to use as entrypoint. +// The rule name must have been specified in the -alternate-entrypoints +// if generating the parser with the -optimize-grammar flag, otherwise +// it may have been optimized out. Passing an empty string sets the +// entrypoint to the first rule in the grammar. +// +// The default is to start parsing at the first rule in the grammar. +func Entrypoint(ruleName string) Option { + return func(p *parser) Option { + oldEntrypoint := p.entrypoint + p.entrypoint = ruleName + if ruleName == "" { + p.entrypoint = g.rules[0].name + } + return Entrypoint(oldEntrypoint) + } +} + +// AllowInvalidUTF8 creates an Option to allow invalid UTF-8 bytes. +// Every invalid UTF-8 byte is treated as a utf8.RuneError (U+FFFD) +// by character class matchers and is matched by the any matcher. +// The returned matched value, c.text and c.offset are NOT affected. +// +// The default is false. +func AllowInvalidUTF8(b bool) Option { + return func(p *parser) Option { + old := p.allowInvalidUTF8 + p.allowInvalidUTF8 = b + return AllowInvalidUTF8(old) + } +} + +// Recover creates an Option to set the recover flag to b. When set to +// true, this causes the parser to recover from panics and convert it +// to an error. Setting it to false can be useful while debugging to +// access the full stack trace. +// +// The default is true. +func Recover(b bool) Option { + return func(p *parser) Option { + old := p.recover + p.recover = b + return Recover(old) + } +} + +// GlobalStore creates an Option to set a key to a certain value in +// the globalStore. +func GlobalStore(key string, value any) Option { + return func(p *parser) Option { + old := p.cur.globalStore[key] + p.cur.globalStore[key] = value + return GlobalStore(key, old) + } +} + +// InitState creates an Option to set a key to a certain value in +// the global "state" store. +func InitState(key string, value any) Option { + return func(p *parser) Option { + old := p.cur.state[key] + p.cur.state[key] = value + return InitState(key, old) + } +} + +// ParseFile parses the file identified by filename. +func ParseFile(filename string, opts ...Option) (i any, err error) { // nolint: deadcode + f, err := os.Open(filename) + if err != nil { + return nil, err + } + defer func() { + if closeErr := f.Close(); closeErr != nil { + err = closeErr + } + }() + return ParseReader(filename, f, opts...) +} + +// ParseReader parses the data from r using filename as information in the +// error messages. +func ParseReader(filename string, r io.Reader, opts ...Option) (any, error) { // nolint: deadcode + b, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + return Parse(filename, b, opts...) +} + +// Parse parses the data from b using filename as information in the +// error messages. +func Parse(filename string, b []byte, opts ...Option) (any, error) { + return newParser(filename, b, opts...).parse(g) +} + +// position records a position in the text. +type position struct { + line, col, offset int +} + +func (p position) String() string { + return strconv.Itoa(p.line) + ":" + strconv.Itoa(p.col) + " [" + strconv.Itoa(p.offset) + "]" +} + +// savepoint stores all state required to go back to this point in the +// parser. +type savepoint struct { + position + rn rune + w int +} + +type current struct { + pos position // start position of the match + text []byte // raw text of the match + + // state is a store for arbitrary key,value pairs that the user wants to be + // tied to the backtracking of the parser. + // This is always rolled back if a parsing rule fails. + state storeDict + + // globalStore is a general store for the user to store arbitrary key-value + // pairs that they need to manage and that they do not want tied to the + // backtracking of the parser. This is only modified by the user and never + // rolled back by the parser. It is always up to the user to keep this in a + // consistent state. + globalStore storeDict +} + +type storeDict map[string]any + +// the AST types... + +// nolint: structcheck +type grammar struct { + pos position + rules []*rule +} + +// nolint: structcheck +type rule struct { + pos position + name string + displayName string + expr any + + leader bool + leftRecursive bool +} + +// nolint: structcheck +type choiceExpr struct { + pos position + alternatives []any +} + +// nolint: structcheck +type actionExpr struct { + pos position + expr any + run func(*parser) (any, error) +} + +// nolint: structcheck +type recoveryExpr struct { + pos position + expr any + recoverExpr any + failureLabel []string +} + +// nolint: structcheck +type seqExpr struct { + pos position + exprs []any +} + +// nolint: structcheck +type throwExpr struct { + pos position + label string +} + +// nolint: structcheck +type labeledExpr struct { + pos position + label string + expr any +} + +// nolint: structcheck +type expr struct { + pos position + expr any +} + +type ( + andExpr expr // nolint: structcheck + notExpr expr // nolint: structcheck + zeroOrOneExpr expr // nolint: structcheck + zeroOrMoreExpr expr // nolint: structcheck + oneOrMoreExpr expr // nolint: structcheck +) + +// nolint: structcheck +type ruleRefExpr struct { + pos position + name string +} + +// nolint: structcheck +type stateCodeExpr struct { + pos position + run func(*parser) error +} + +// nolint: structcheck +type andCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +// nolint: structcheck +type notCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +// nolint: structcheck +type litMatcher struct { + pos position + val string + ignoreCase bool + want string +} + +// nolint: structcheck +type charClassMatcher struct { + pos position + val string + basicLatinChars [128]bool + chars []rune + ranges []rune + classes []*unicode.RangeTable + ignoreCase bool + inverted bool +} + +type anyMatcher position // nolint: structcheck + +// errList cumulates the errors found by the parser. +type errList []error + +func (e *errList) add(err error) { + *e = append(*e, err) +} + +func (e errList) err() error { + if len(e) == 0 { + return nil + } + e.dedupe() + return e +} + +func (e *errList) dedupe() { + var cleaned []error + set := make(map[string]bool) + for _, err := range *e { + if msg := err.Error(); !set[msg] { + set[msg] = true + cleaned = append(cleaned, err) + } + } + *e = cleaned +} + +func (e errList) Error() string { + switch len(e) { + case 0: + return "" + case 1: + return e[0].Error() + default: + var buf bytes.Buffer + + for i, err := range e { + if i > 0 { + buf.WriteRune('\n') + } + buf.WriteString(err.Error()) + } + return buf.String() + } +} + +// parserError wraps an error with a prefix indicating the rule in which +// the error occurred. The original error is stored in the Inner field. +type parserError struct { + Inner error + pos position + prefix string + expected []string +} + +// Error returns the error message. +func (p *parserError) Error() string { + return p.prefix + ": " + p.Inner.Error() +} + +// newParser creates a parser with the specified input source and options. +func newParser(filename string, b []byte, opts ...Option) *parser { + stats := Stats{ + ChoiceAltCnt: make(map[string]map[string]int), + } + + p := &parser{ + filename: filename, + errs: new(errList), + data: b, + pt: savepoint{position: position{line: 1}}, + recover: true, + cur: current{ + state: make(storeDict), + globalStore: make(storeDict), + }, + maxFailPos: position{col: 1, line: 1}, + maxFailExpected: make([]string, 0, 20), + Stats: &stats, + // start rule is rule [0] unless an alternate entrypoint is specified + entrypoint: g.rules[0].name, + } + p.setOptions(opts) + + if p.maxExprCnt == 0 { + p.maxExprCnt = math.MaxUint64 + } + + return p +} + +// setOptions applies the options to the parser. +func (p *parser) setOptions(opts []Option) { + for _, opt := range opts { + opt(p) + } +} + +// nolint: structcheck,deadcode +type resultTuple struct { + v any + b bool + end savepoint +} + +// nolint: varcheck +const choiceNoMatch = -1 + +// Stats stores some statistics, gathered during parsing +type Stats struct { + // ExprCnt counts the number of expressions processed during parsing + // This value is compared to the maximum number of expressions allowed + // (set by the MaxExpressions option). + ExprCnt uint64 + + // ChoiceAltCnt is used to count for each ordered choice expression, + // which alternative is used how may times. + // These numbers allow to optimize the order of the ordered choice expression + // to increase the performance of the parser + // + // The outer key of ChoiceAltCnt is composed of the name of the rule as well + // as the line and the column of the ordered choice. + // The inner key of ChoiceAltCnt is the number (one-based) of the matching alternative. + // For each alternative the number of matches are counted. If an ordered choice does not + // match, a special counter is incremented. The name of this counter is set with + // the parser option Statistics. + // For an alternative to be included in ChoiceAltCnt, it has to match at least once. + ChoiceAltCnt map[string]map[string]int +} + +type ruleWithExpsStack struct { + rule *rule + estack []any +} + +// nolint: structcheck,maligned +type parser struct { + filename string + pt savepoint + cur current + + data []byte + errs *errList + + depth int + recover bool + // memoization table for the packrat algorithm: + // map[offset in source] map[expression or rule] {value, match} + memo map[int]map[any]resultTuple + + // rules table, maps the rule identifier to the rule node + rules map[string]*rule + // variables stack, map of label to value + vstack []map[string]any + // rule stack, allows identification of the current rule in errors + rstack []*rule + + // parse fail + maxFailPos position + maxFailExpected []string + maxFailInvertExpected bool + + // max number of expressions to be parsed + maxExprCnt uint64 + // entrypoint for the parser + entrypoint string + + allowInvalidUTF8 bool + + *Stats + + choiceNoMatch string + // recovery expression stack, keeps track of the currently available recovery expression, these are traversed in reverse + recoveryStack []map[string]any +} + +// push a variable set on the vstack. +func (p *parser) pushV() { + if cap(p.vstack) == len(p.vstack) { + // create new empty slot in the stack + p.vstack = append(p.vstack, nil) + } else { + // slice to 1 more + p.vstack = p.vstack[:len(p.vstack)+1] + } + + // get the last args set + m := p.vstack[len(p.vstack)-1] + if m != nil && len(m) == 0 { + // empty map, all good + return + } + + m = make(map[string]any) + p.vstack[len(p.vstack)-1] = m +} + +// pop a variable set from the vstack. +func (p *parser) popV() { + // if the map is not empty, clear it + m := p.vstack[len(p.vstack)-1] + if len(m) > 0 { + // GC that map + p.vstack[len(p.vstack)-1] = nil + } + p.vstack = p.vstack[:len(p.vstack)-1] +} + +// push a recovery expression with its labels to the recoveryStack +func (p *parser) pushRecovery(labels []string, expr any) { + if cap(p.recoveryStack) == len(p.recoveryStack) { + // create new empty slot in the stack + p.recoveryStack = append(p.recoveryStack, nil) + } else { + // slice to 1 more + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)+1] + } + + m := make(map[string]any, len(labels)) + for _, fl := range labels { + m[fl] = expr + } + p.recoveryStack[len(p.recoveryStack)-1] = m +} + +// pop a recovery expression from the recoveryStack +func (p *parser) popRecovery() { + // GC that map + p.recoveryStack[len(p.recoveryStack)-1] = nil + + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)-1] +} + +func (p *parser) addErr(err error) { + p.addErrAt(err, p.pt.position, []string{}) +} + +func (p *parser) addErrAt(err error, pos position, expected []string) { + var buf bytes.Buffer + if p.filename != "" { + buf.WriteString(p.filename) + } + if buf.Len() > 0 { + buf.WriteString(":") + } + buf.WriteString(fmt.Sprintf("%d:%d (%d)", pos.line, pos.col, pos.offset)) + if len(p.rstack) > 0 { + if buf.Len() > 0 { + buf.WriteString(": ") + } + rule := p.rstack[len(p.rstack)-1] + if rule.displayName != "" { + buf.WriteString("rule " + rule.displayName) + } else { + buf.WriteString("rule " + rule.name) + } + } + pe := &parserError{Inner: err, pos: pos, prefix: buf.String(), expected: expected} + p.errs.add(pe) +} + +func (p *parser) failAt(fail bool, pos position, want string) { + // process fail if parsing fails and not inverted or parsing succeeds and invert is set + if fail == p.maxFailInvertExpected { + if pos.offset < p.maxFailPos.offset { + return + } + + if pos.offset > p.maxFailPos.offset { + p.maxFailPos = pos + p.maxFailExpected = p.maxFailExpected[:0] + } + + if p.maxFailInvertExpected { + want = "!" + want + } + p.maxFailExpected = append(p.maxFailExpected, want) + } +} + +// read advances the parser to the next rune. +func (p *parser) read() { + p.pt.offset += p.pt.w + rn, n := utf8.DecodeRune(p.data[p.pt.offset:]) + p.pt.rn = rn + p.pt.w = n + p.pt.col++ + if rn == '\n' { + p.pt.line++ + p.pt.col = 0 + } + + if rn == utf8.RuneError && n == 1 { // see utf8.DecodeRune + if !p.allowInvalidUTF8 { + p.addErr(errInvalidEncoding) + } + } +} + +// restore parser position to the savepoint pt. +func (p *parser) restore(pt savepoint) { + if pt.offset == p.pt.offset { + return + } + p.pt = pt +} + +// Cloner is implemented by any value that has a Clone method, which returns a +// copy of the value. This is mainly used for types which are not passed by +// value (e.g map, slice, chan) or structs that contain such types. +// +// This is used in conjunction with the global state feature to create proper +// copies of the state to allow the parser to properly restore the state in +// the case of backtracking. +type Cloner interface { + Clone() any +} + +var statePool = &sync.Pool{ + New: func() any { return make(storeDict) }, +} + +func (sd storeDict) Discard() { + for k := range sd { + delete(sd, k) + } + statePool.Put(sd) +} + +// clone and return parser current state. +func (p *parser) cloneState() storeDict { + + state := statePool.Get().(storeDict) + for k, v := range p.cur.state { + if c, ok := v.(Cloner); ok { + state[k] = c.Clone() + } else { + state[k] = v + } + } + return state +} + +// restore parser current state to the state storeDict. +// every restoreState should applied only one time for every cloned state +func (p *parser) restoreState(state storeDict) { + p.cur.state.Discard() + p.cur.state = state +} + +// get the slice of bytes from the savepoint start to the current position. +func (p *parser) sliceFrom(start savepoint) []byte { + return p.data[start.position.offset:p.pt.position.offset] +} + +func (p *parser) getMemoized(node any) (resultTuple, bool) { + if len(p.memo) == 0 { + return resultTuple{}, false + } + m := p.memo[p.pt.offset] + if len(m) == 0 { + return resultTuple{}, false + } + res, ok := m[node] + return res, ok +} + +func (p *parser) setMemoized(pt savepoint, node any, tuple resultTuple) { + if p.memo == nil { + p.memo = make(map[int]map[any]resultTuple) + } + m := p.memo[pt.offset] + if m == nil { + m = make(map[any]resultTuple) + p.memo[pt.offset] = m + } + m[node] = tuple +} + +func (p *parser) buildRulesTable(g *grammar) { + p.rules = make(map[string]*rule, len(g.rules)) + for _, r := range g.rules { + p.rules[r.name] = r + } +} + +// nolint: gocyclo +func (p *parser) parse(g *grammar) (val any, err error) { + if len(g.rules) == 0 { + p.addErr(errNoRule) + return nil, p.errs.err() + } + + // TODO : not super critical but this could be generated + p.buildRulesTable(g) + + if p.recover { + // panic can be used in action code to stop parsing immediately + // and return the panic as an error. + defer func() { + if e := recover(); e != nil { + val = nil + switch e := e.(type) { + case error: + p.addErr(e) + default: + p.addErr(fmt.Errorf("%v", e)) + } + err = p.errs.err() + } + }() + } + + startRule, ok := p.rules[p.entrypoint] + if !ok { + p.addErr(errInvalidEntrypoint) + return nil, p.errs.err() + } + + p.read() // advance to first rune + val, ok = p.parseRuleWrap(startRule) + if !ok { + if len(*p.errs) == 0 { + // If parsing fails, but no errors have been recorded, the expected values + // for the farthest parser position are returned as error. + maxFailExpectedMap := make(map[string]struct{}, len(p.maxFailExpected)) + for _, v := range p.maxFailExpected { + maxFailExpectedMap[v] = struct{}{} + } + expected := make([]string, 0, len(maxFailExpectedMap)) + eof := false + if _, ok := maxFailExpectedMap["!."]; ok { + delete(maxFailExpectedMap, "!.") + eof = true + } + for k := range maxFailExpectedMap { + expected = append(expected, k) + } + sort.Strings(expected) + if eof { + expected = append(expected, "EOF") + } + p.addErrAt(errors.New("no match found, expected: "+listJoin(expected, ", ", "or")), p.maxFailPos, expected) + } + + return nil, p.errs.err() + } + return val, p.errs.err() +} + +func listJoin(list []string, sep string, lastSep string) string { + switch len(list) { + case 0: + return "" + case 1: + return list[0] + default: + return strings.Join(list[:len(list)-1], sep) + " " + lastSep + " " + list[len(list)-1] + } +} + +func (p *parser) parseRuleRecursiveLeader(rule *rule) (any, bool) { + result, ok := p.getMemoized(rule) + if ok { + p.restore(result.end) + return result.v, result.b + } + + var ( + depth = 0 + startMark = p.pt + lastResult = resultTuple{nil, false, startMark} + lastErrors = *p.errs + ) + + for { + lastState := p.cloneState() + p.setMemoized(startMark, rule, lastResult) + val, ok := p.parseRule(rule) + endMark := p.pt + if (!ok) || (endMark.offset <= lastResult.end.offset && depth != 0) { + p.restoreState(lastState) + *p.errs = lastErrors + break + } + lastResult = resultTuple{val, ok, endMark} + lastErrors = *p.errs + p.restore(startMark) + depth++ + } + + p.restore(lastResult.end) + p.setMemoized(startMark, rule, lastResult) + return lastResult.v, lastResult.b +} + +func (p *parser) parseRuleRecursiveNoLeader(rule *rule) (any, bool) { + return p.parseRule(rule) +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { + var ( + val any + ok bool + ) + + if rule.leftRecursive { + if rule.leader { + val, ok = p.parseRuleRecursiveLeader(rule) + } else { + val, ok = p.parseRuleRecursiveNoLeader(rule) + } + } else { + val, ok = p.parseRule(rule) + } + + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { + p.rstack = append(p.rstack, rule) + p.pushV() + val, ok := p.parseExprWrap(rule.expr) + p.popV() + p.rstack = p.rstack[:len(p.rstack)-1] + return val, ok +} + +func (p *parser) parseExprWrap(expr any) (any, bool) { + val, ok := p.parseExpr(expr) + + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { + p.ExprCnt++ + if p.ExprCnt > p.maxExprCnt { + panic(errMaxExprCnt) + } + + var val any + var ok bool + switch expr := expr.(type) { + case *actionExpr: + val, ok = p.parseActionExpr(expr) + case *andCodeExpr: + val, ok = p.parseAndCodeExpr(expr) + case *andExpr: + val, ok = p.parseAndExpr(expr) + case *anyMatcher: + val, ok = p.parseAnyMatcher(expr) + case *charClassMatcher: + val, ok = p.parseCharClassMatcher(expr) + case *choiceExpr: + val, ok = p.parseChoiceExpr(expr) + case *labeledExpr: + val, ok = p.parseLabeledExpr(expr) + case *litMatcher: + val, ok = p.parseLitMatcher(expr) + case *notCodeExpr: + val, ok = p.parseNotCodeExpr(expr) + case *notExpr: + val, ok = p.parseNotExpr(expr) + case *oneOrMoreExpr: + val, ok = p.parseOneOrMoreExpr(expr) + case *recoveryExpr: + val, ok = p.parseRecoveryExpr(expr) + case *ruleRefExpr: + val, ok = p.parseRuleRefExpr(expr) + case *seqExpr: + val, ok = p.parseSeqExpr(expr) + case *stateCodeExpr: + val, ok = p.parseStateCodeExpr(expr) + case *throwExpr: + val, ok = p.parseThrowExpr(expr) + case *zeroOrMoreExpr: + val, ok = p.parseZeroOrMoreExpr(expr) + case *zeroOrOneExpr: + val, ok = p.parseZeroOrOneExpr(expr) + default: + panic(fmt.Sprintf("unknown expression type %T", expr)) + } + return val, ok +} + +func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { + start := p.pt + val, ok := p.parseExprWrap(act.expr) + if ok { + p.cur.pos = start.position + p.cur.text = p.sliceFrom(start) + state := p.cloneState() + actVal, err := act.run(p) + if err != nil { + p.addErrAt(err, start.position, []string{}) + } + p.restoreState(state) + + val = actVal + } + return val, ok +} + +func (p *parser) parseAndCodeExpr(and *andCodeExpr) (any, bool) { + state := p.cloneState() + + ok, err := and.run(p) + if err != nil { + p.addErr(err) + } + p.restoreState(state) + + return nil, ok +} + +func (p *parser) parseAndExpr(and *andExpr) (any, bool) { + pt := p.pt + state := p.cloneState() + p.pushV() + _, ok := p.parseExprWrap(and.expr) + p.popV() + p.restoreState(state) + p.restore(pt) + + return nil, ok +} + +func (p *parser) parseAnyMatcher(any *anyMatcher) (any, bool) { + if p.pt.rn == utf8.RuneError && p.pt.w == 0 { + // EOF - see utf8.DecodeRune + p.failAt(false, p.pt.position, ".") + return nil, false + } + start := p.pt + p.read() + p.failAt(true, start.position, ".") + return p.sliceFrom(start), true +} + +// nolint: gocyclo +func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (any, bool) { + cur := p.pt.rn + start := p.pt + + // can't match EOF + if cur == utf8.RuneError && p.pt.w == 0 { // see utf8.DecodeRune + p.failAt(false, start.position, chr.val) + return nil, false + } + + if chr.ignoreCase { + cur = unicode.ToLower(cur) + } + + // try to match in the list of available chars + for _, rn := range chr.chars { + if rn == cur { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of ranges + for i := 0; i < len(chr.ranges); i += 2 { + if cur >= chr.ranges[i] && cur <= chr.ranges[i+1] { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of Unicode classes + for _, cl := range chr.classes { + if unicode.Is(cl, cur) { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + if chr.inverted { + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + p.failAt(false, start.position, chr.val) + return nil, false +} + +func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { + + for altI, alt := range ch.alternatives { + // dummy assignment to prevent compile error if optimized + _ = altI + + state := p.cloneState() + + p.pushV() + val, ok := p.parseExprWrap(alt) + p.popV() + if ok { + return val, ok + } + p.restoreState(state) + } + return nil, false +} + +func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { + p.pushV() + val, ok := p.parseExprWrap(lab.expr) + p.popV() + if ok && lab.label != "" { + m := p.vstack[len(p.vstack)-1] + m[lab.label] = val + } + return val, ok +} + +func (p *parser) parseLitMatcher(lit *litMatcher) (any, bool) { + start := p.pt + for _, want := range lit.val { + cur := p.pt.rn + if lit.ignoreCase { + cur = unicode.ToLower(cur) + } + if cur != want { + p.failAt(false, start.position, lit.want) + p.restore(start) + return nil, false + } + p.read() + } + p.failAt(true, start.position, lit.want) + return p.sliceFrom(start), true +} + +func (p *parser) parseNotCodeExpr(not *notCodeExpr) (any, bool) { + state := p.cloneState() + + ok, err := not.run(p) + if err != nil { + p.addErr(err) + } + p.restoreState(state) + + return nil, !ok +} + +func (p *parser) parseNotExpr(not *notExpr) (any, bool) { + pt := p.pt + state := p.cloneState() + p.pushV() + p.maxFailInvertExpected = !p.maxFailInvertExpected + _, ok := p.parseExprWrap(not.expr) + p.maxFailInvertExpected = !p.maxFailInvertExpected + p.popV() + p.restoreState(state) + p.restore(pt) + + return nil, !ok +} + +func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + if len(vals) == 0 { + // did not match once, no match + return nil, false + } + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { + + p.pushRecovery(recover.failureLabel, recover.recoverExpr) + val, ok := p.parseExprWrap(recover.expr) + p.popRecovery() + + return val, ok +} + +func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { + if ref.name == "" { + panic(fmt.Sprintf("%s: invalid rule: missing name", ref.pos)) + } + + rule := p.rules[ref.name] + if rule == nil { + p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) + return nil, false + } + return p.parseRuleWrap(rule) +} + +func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { + vals := make([]any, 0, len(seq.exprs)) + + pt := p.pt + state := p.cloneState() + for _, expr := range seq.exprs { + val, ok := p.parseExprWrap(expr) + if !ok { + p.restoreState(state) + p.restore(pt) + return nil, false + } + vals = append(vals, val) + } + return vals, true +} + +func (p *parser) parseStateCodeExpr(state *stateCodeExpr) (any, bool) { + err := state.run(p) + if err != nil { + p.addErr(err) + } + return nil, true +} + +func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { + + for i := len(p.recoveryStack) - 1; i >= 0; i-- { + if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { + return val, ok + } + } + } + + return nil, false +} + +func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { + p.pushV() + val, _ := p.parseExprWrap(expr.expr) + p.popV() + // whether it matched or not, consider it a match + return val, true +} diff --git a/test/left_recursion_state/standart/left_recursion_state.go b/test/left_recursion_state/standart/left_recursion_state.go new file mode 100644 index 0000000..78aefa1 --- /dev/null +++ b/test/left_recursion_state/standart/left_recursion_state.go @@ -0,0 +1,1765 @@ +// Code generated by pigeon; DO NOT EDIT. + +package leftrecursionstate + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "os" + "sort" + "strconv" + "strings" + "sync" + "unicode" + "unicode/utf8" +) + +var g = &grammar{ + rules: []*rule{ + { + name: "start", + pos: position{line: 5, col: 1, offset: 34}, + expr: &actionExpr{ + pos: position{line: 5, col: 9, offset: 42}, + run: (*parser).callonstart1, + expr: &seqExpr{ + pos: position{line: 5, col: 9, offset: 42}, + exprs: []any{ + &stateCodeExpr{ + pos: position{line: 5, col: 9, offset: 42}, + run: (*parser).callonstart3, + }, + &zeroOrOneExpr{ + pos: position{line: 10, col: 3, offset: 128}, + expr: &labeledExpr{ + pos: position{line: 10, col: 4, offset: 129}, + label: "a", + expr: &ruleRefExpr{ + pos: position{line: 10, col: 6, offset: 131}, + name: "expr", + }, + }, + }, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "expr", + pos: position{line: 14, col: 1, offset: 173}, + expr: &choiceExpr{ + pos: position{line: 14, col: 9, offset: 181}, + alternatives: []any{ + &seqExpr{ + pos: position{line: 14, col: 9, offset: 181}, + exprs: []any{ + &seqExpr{ + pos: position{line: 14, col: 10, offset: 182}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 14, col: 10, offset: 182}, + name: "expr", + }, + &choiceExpr{ + pos: position{line: 14, col: 16, offset: 188}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 14, col: 16, offset: 188}, + val: "+", + ignoreCase: false, + want: "\"+\"", + }, + &litMatcher{ + pos: position{line: 14, col: 22, offset: 194}, + val: "-", + ignoreCase: false, + want: "\"-\"", + }, + }, + }, + &ruleRefExpr{ + pos: position{line: 14, col: 27, offset: 199}, + name: "term", + }, + }, + }, + &stateCodeExpr{ + pos: position{line: 14, col: 33, offset: 205}, + run: (*parser).callonexpr9, + }, + }, + }, + &seqExpr{ + pos: position{line: 17, col: 5, offset: 278}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 17, col: 5, offset: 278}, + name: "term", + }, + &stateCodeExpr{ + pos: position{line: 17, col: 10, offset: 283}, + run: (*parser).callonexpr12, + }, + }, + }, + }, + }, + leader: true, + leftRecursive: true, + }, + { + name: "term", + pos: position{line: 22, col: 1, offset: 355}, + expr: &choiceExpr{ + pos: position{line: 22, col: 8, offset: 362}, + alternatives: []any{ + &seqExpr{ + pos: position{line: 22, col: 8, offset: 362}, + exprs: []any{ + &seqExpr{ + pos: position{line: 22, col: 9, offset: 363}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 22, col: 9, offset: 363}, + name: "term", + }, + &choiceExpr{ + pos: position{line: 22, col: 15, offset: 369}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 22, col: 15, offset: 369}, + val: "*", + ignoreCase: false, + want: "\"*\"", + }, + &litMatcher{ + pos: position{line: 22, col: 21, offset: 375}, + val: "/", + ignoreCase: false, + want: "\"/\"", + }, + &litMatcher{ + pos: position{line: 22, col: 27, offset: 381}, + val: "%", + ignoreCase: false, + want: "\"%\"", + }, + }, + }, + &ruleRefExpr{ + pos: position{line: 22, col: 32, offset: 386}, + name: "factor", + }, + }, + }, + &stateCodeExpr{ + pos: position{line: 22, col: 40, offset: 394}, + run: (*parser).callonterm10, + }, + }, + }, + &seqExpr{ + pos: position{line: 25, col: 5, offset: 467}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 25, col: 5, offset: 467}, + name: "factor", + }, + &stateCodeExpr{ + pos: position{line: 25, col: 12, offset: 474}, + run: (*parser).callonterm13, + }, + }, + }, + }, + }, + leader: true, + leftRecursive: true, + }, + { + name: "factor", + pos: position{line: 30, col: 1, offset: 547}, + expr: &choiceExpr{ + pos: position{line: 30, col: 10, offset: 556}, + alternatives: []any{ + &seqExpr{ + pos: position{line: 30, col: 10, offset: 556}, + exprs: []any{ + &seqExpr{ + pos: position{line: 30, col: 11, offset: 557}, + exprs: []any{ + &choiceExpr{ + pos: position{line: 30, col: 12, offset: 558}, + alternatives: []any{ + &litMatcher{ + pos: position{line: 30, col: 12, offset: 558}, + val: "+", + ignoreCase: false, + want: "\"+\"", + }, + &litMatcher{ + pos: position{line: 30, col: 18, offset: 564}, + val: "-", + ignoreCase: false, + want: "\"-\"", + }, + }, + }, + &ruleRefExpr{ + pos: position{line: 30, col: 23, offset: 569}, + name: "factor", + }, + }, + }, + &stateCodeExpr{ + pos: position{line: 30, col: 31, offset: 577}, + run: (*parser).callonfactor8, + }, + }, + }, + &seqExpr{ + pos: position{line: 33, col: 5, offset: 651}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 33, col: 5, offset: 651}, + name: "atom", + }, + &stateCodeExpr{ + pos: position{line: 33, col: 10, offset: 656}, + run: (*parser).callonfactor11, + }, + }, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "atom", + pos: position{line: 38, col: 1, offset: 729}, + expr: &seqExpr{ + pos: position{line: 38, col: 8, offset: 736}, + exprs: []any{ + &oneOrMoreExpr{ + pos: position{line: 38, col: 9, offset: 737}, + expr: &charClassMatcher{ + pos: position{line: 38, col: 9, offset: 737}, + val: "[0-9]", + ranges: []rune{'0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + &stateCodeExpr{ + pos: position{line: 38, col: 17, offset: 745}, + run: (*parser).callonatom4, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + }, +} + +func (c *current) onstart3() error { + + if _, ok := c.state["count"]; !ok { + c.state["count"] = 0 + } + return nil +} + +func (p *parser) callonstart3() error { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onstart3() +} + +func (c *current) onstart1() (any, error) { + return c.state["count"], nil +} + +func (p *parser) callonstart1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onstart1() +} + +func (c *current) onexpr9() error { + c.state["count"] = c.state["count"].(int) + 1 + return nil +} + +func (p *parser) callonexpr9() error { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onexpr9() +} + +func (c *current) onexpr12() error { + c.state["count"] = c.state["count"].(int) + 3 + return nil +} + +func (p *parser) callonexpr12() error { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onexpr12() +} + +func (c *current) onterm10() error { + c.state["count"] = c.state["count"].(int) + 7 + return nil +} + +func (p *parser) callonterm10() error { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onterm10() +} + +func (c *current) onterm13() error { + c.state["count"] = c.state["count"].(int) + 15 + return nil +} + +func (p *parser) callonterm13() error { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onterm13() +} + +func (c *current) onfactor8() error { + c.state["count"] = c.state["count"].(int) + 31 + return nil +} + +func (p *parser) callonfactor8() error { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onfactor8() +} + +func (c *current) onfactor11() error { + c.state["count"] = c.state["count"].(int) + 63 + return nil +} + +func (p *parser) callonfactor11() error { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onfactor11() +} + +func (c *current) onatom4() error { + c.state["count"] = c.state["count"].(int) + 127 + return nil +} + +func (p *parser) callonatom4() error { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onatom4() +} + +var ( + // errNoRule is returned when the grammar to parse has no rule. + errNoRule = errors.New("grammar has no rule") + + // errInvalidEntrypoint is returned when the specified entrypoint rule + // does not exit. + errInvalidEntrypoint = errors.New("invalid entrypoint") + + // errInvalidEncoding is returned when the source is not properly + // utf8-encoded. + errInvalidEncoding = errors.New("invalid encoding") + + // errMaxExprCnt is used to signal that the maximum number of + // expressions have been parsed. + errMaxExprCnt = errors.New("max number of expresssions parsed") +) + +// Option is a function that can set an option on the parser. It returns +// the previous setting as an Option. +type Option func(*parser) Option + +// MaxExpressions creates an Option to stop parsing after the provided +// number of expressions have been parsed, if the value is 0 then the parser will +// parse for as many steps as needed (possibly an infinite number). +// +// The default for maxExprCnt is 0. +func MaxExpressions(maxExprCnt uint64) Option { + return func(p *parser) Option { + oldMaxExprCnt := p.maxExprCnt + p.maxExprCnt = maxExprCnt + return MaxExpressions(oldMaxExprCnt) + } +} + +// Entrypoint creates an Option to set the rule name to use as entrypoint. +// The rule name must have been specified in the -alternate-entrypoints +// if generating the parser with the -optimize-grammar flag, otherwise +// it may have been optimized out. Passing an empty string sets the +// entrypoint to the first rule in the grammar. +// +// The default is to start parsing at the first rule in the grammar. +func Entrypoint(ruleName string) Option { + return func(p *parser) Option { + oldEntrypoint := p.entrypoint + p.entrypoint = ruleName + if ruleName == "" { + p.entrypoint = g.rules[0].name + } + return Entrypoint(oldEntrypoint) + } +} + +// Statistics adds a user provided Stats struct to the parser to allow +// the user to process the results after the parsing has finished. +// Also the key for the "no match" counter is set. +// +// Example usage: +// +// input := "input" +// stats := Stats{} +// _, err := Parse("input-file", []byte(input), Statistics(&stats, "no match")) +// if err != nil { +// log.Panicln(err) +// } +// b, err := json.MarshalIndent(stats.ChoiceAltCnt, "", " ") +// if err != nil { +// log.Panicln(err) +// } +// fmt.Println(string(b)) +func Statistics(stats *Stats, choiceNoMatch string) Option { + return func(p *parser) Option { + oldStats := p.Stats + p.Stats = stats + oldChoiceNoMatch := p.choiceNoMatch + p.choiceNoMatch = choiceNoMatch + if p.Stats.ChoiceAltCnt == nil { + p.Stats.ChoiceAltCnt = make(map[string]map[string]int) + } + return Statistics(oldStats, oldChoiceNoMatch) + } +} + +// Debug creates an Option to set the debug flag to b. When set to true, +// debugging information is printed to stdout while parsing. +// +// The default is false. +func Debug(b bool) Option { + return func(p *parser) Option { + old := p.debug + p.debug = b + return Debug(old) + } +} + +// Memoize creates an Option to set the memoize flag to b. When set to true, +// the parser will cache all results so each expression is evaluated only +// once. This guarantees linear parsing time even for pathological cases, +// at the expense of more memory and slower times for typical cases. +// +// The default is false. +func Memoize(b bool) Option { + return func(p *parser) Option { + old := p.memoize + p.memoize = b + return Memoize(old) + } +} + +// AllowInvalidUTF8 creates an Option to allow invalid UTF-8 bytes. +// Every invalid UTF-8 byte is treated as a utf8.RuneError (U+FFFD) +// by character class matchers and is matched by the any matcher. +// The returned matched value, c.text and c.offset are NOT affected. +// +// The default is false. +func AllowInvalidUTF8(b bool) Option { + return func(p *parser) Option { + old := p.allowInvalidUTF8 + p.allowInvalidUTF8 = b + return AllowInvalidUTF8(old) + } +} + +// Recover creates an Option to set the recover flag to b. When set to +// true, this causes the parser to recover from panics and convert it +// to an error. Setting it to false can be useful while debugging to +// access the full stack trace. +// +// The default is true. +func Recover(b bool) Option { + return func(p *parser) Option { + old := p.recover + p.recover = b + return Recover(old) + } +} + +// GlobalStore creates an Option to set a key to a certain value in +// the globalStore. +func GlobalStore(key string, value any) Option { + return func(p *parser) Option { + old := p.cur.globalStore[key] + p.cur.globalStore[key] = value + return GlobalStore(key, old) + } +} + +// InitState creates an Option to set a key to a certain value in +// the global "state" store. +func InitState(key string, value any) Option { + return func(p *parser) Option { + old := p.cur.state[key] + p.cur.state[key] = value + return InitState(key, old) + } +} + +// ParseFile parses the file identified by filename. +func ParseFile(filename string, opts ...Option) (i any, err error) { // nolint: deadcode + f, err := os.Open(filename) + if err != nil { + return nil, err + } + defer func() { + if closeErr := f.Close(); closeErr != nil { + err = closeErr + } + }() + return ParseReader(filename, f, opts...) +} + +// ParseReader parses the data from r using filename as information in the +// error messages. +func ParseReader(filename string, r io.Reader, opts ...Option) (any, error) { // nolint: deadcode + b, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + return Parse(filename, b, opts...) +} + +// Parse parses the data from b using filename as information in the +// error messages. +func Parse(filename string, b []byte, opts ...Option) (any, error) { + return newParser(filename, b, opts...).parse(g) +} + +// position records a position in the text. +type position struct { + line, col, offset int +} + +func (p position) String() string { + return strconv.Itoa(p.line) + ":" + strconv.Itoa(p.col) + " [" + strconv.Itoa(p.offset) + "]" +} + +// savepoint stores all state required to go back to this point in the +// parser. +type savepoint struct { + position + rn rune + w int +} + +type current struct { + pos position // start position of the match + text []byte // raw text of the match + + // state is a store for arbitrary key,value pairs that the user wants to be + // tied to the backtracking of the parser. + // This is always rolled back if a parsing rule fails. + state storeDict + + // globalStore is a general store for the user to store arbitrary key-value + // pairs that they need to manage and that they do not want tied to the + // backtracking of the parser. This is only modified by the user and never + // rolled back by the parser. It is always up to the user to keep this in a + // consistent state. + globalStore storeDict +} + +type storeDict map[string]any + +// the AST types... + +// nolint: structcheck +type grammar struct { + pos position + rules []*rule +} + +// nolint: structcheck +type rule struct { + pos position + name string + displayName string + expr any + + leader bool + leftRecursive bool +} + +// nolint: structcheck +type choiceExpr struct { + pos position + alternatives []any +} + +// nolint: structcheck +type actionExpr struct { + pos position + expr any + run func(*parser) (any, error) +} + +// nolint: structcheck +type recoveryExpr struct { + pos position + expr any + recoverExpr any + failureLabel []string +} + +// nolint: structcheck +type seqExpr struct { + pos position + exprs []any +} + +// nolint: structcheck +type throwExpr struct { + pos position + label string +} + +// nolint: structcheck +type labeledExpr struct { + pos position + label string + expr any +} + +// nolint: structcheck +type expr struct { + pos position + expr any +} + +type ( + andExpr expr // nolint: structcheck + notExpr expr // nolint: structcheck + zeroOrOneExpr expr // nolint: structcheck + zeroOrMoreExpr expr // nolint: structcheck + oneOrMoreExpr expr // nolint: structcheck +) + +// nolint: structcheck +type ruleRefExpr struct { + pos position + name string +} + +// nolint: structcheck +type stateCodeExpr struct { + pos position + run func(*parser) error +} + +// nolint: structcheck +type andCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +// nolint: structcheck +type notCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +// nolint: structcheck +type litMatcher struct { + pos position + val string + ignoreCase bool + want string +} + +// nolint: structcheck +type charClassMatcher struct { + pos position + val string + basicLatinChars [128]bool + chars []rune + ranges []rune + classes []*unicode.RangeTable + ignoreCase bool + inverted bool +} + +type anyMatcher position // nolint: structcheck + +// errList cumulates the errors found by the parser. +type errList []error + +func (e *errList) add(err error) { + *e = append(*e, err) +} + +func (e errList) err() error { + if len(e) == 0 { + return nil + } + e.dedupe() + return e +} + +func (e *errList) dedupe() { + var cleaned []error + set := make(map[string]bool) + for _, err := range *e { + if msg := err.Error(); !set[msg] { + set[msg] = true + cleaned = append(cleaned, err) + } + } + *e = cleaned +} + +func (e errList) Error() string { + switch len(e) { + case 0: + return "" + case 1: + return e[0].Error() + default: + var buf bytes.Buffer + + for i, err := range e { + if i > 0 { + buf.WriteRune('\n') + } + buf.WriteString(err.Error()) + } + return buf.String() + } +} + +// parserError wraps an error with a prefix indicating the rule in which +// the error occurred. The original error is stored in the Inner field. +type parserError struct { + Inner error + pos position + prefix string + expected []string +} + +// Error returns the error message. +func (p *parserError) Error() string { + return p.prefix + ": " + p.Inner.Error() +} + +// newParser creates a parser with the specified input source and options. +func newParser(filename string, b []byte, opts ...Option) *parser { + stats := Stats{ + ChoiceAltCnt: make(map[string]map[string]int), + } + + p := &parser{ + filename: filename, + errs: new(errList), + data: b, + pt: savepoint{position: position{line: 1}}, + recover: true, + cur: current{ + state: make(storeDict), + globalStore: make(storeDict), + }, + maxFailPos: position{col: 1, line: 1}, + maxFailExpected: make([]string, 0, 20), + Stats: &stats, + // start rule is rule [0] unless an alternate entrypoint is specified + entrypoint: g.rules[0].name, + } + p.setOptions(opts) + + if p.maxExprCnt == 0 { + p.maxExprCnt = math.MaxUint64 + } + + return p +} + +// setOptions applies the options to the parser. +func (p *parser) setOptions(opts []Option) { + for _, opt := range opts { + opt(p) + } +} + +// nolint: structcheck,deadcode +type resultTuple struct { + v any + b bool + end savepoint +} + +// nolint: varcheck +const choiceNoMatch = -1 + +// Stats stores some statistics, gathered during parsing +type Stats struct { + // ExprCnt counts the number of expressions processed during parsing + // This value is compared to the maximum number of expressions allowed + // (set by the MaxExpressions option). + ExprCnt uint64 + + // ChoiceAltCnt is used to count for each ordered choice expression, + // which alternative is used how may times. + // These numbers allow to optimize the order of the ordered choice expression + // to increase the performance of the parser + // + // The outer key of ChoiceAltCnt is composed of the name of the rule as well + // as the line and the column of the ordered choice. + // The inner key of ChoiceAltCnt is the number (one-based) of the matching alternative. + // For each alternative the number of matches are counted. If an ordered choice does not + // match, a special counter is incremented. The name of this counter is set with + // the parser option Statistics. + // For an alternative to be included in ChoiceAltCnt, it has to match at least once. + ChoiceAltCnt map[string]map[string]int +} + +type ruleWithExpsStack struct { + rule *rule + estack []any +} + +// nolint: structcheck,maligned +type parser struct { + filename string + pt savepoint + cur current + + data []byte + errs *errList + + depth int + recover bool + debug bool + + memoize bool + // memoization table for the packrat algorithm: + // map[offset in source] map[expression or rule] {value, match} + memo map[int]map[any]resultTuple + + // rules table, maps the rule identifier to the rule node + rules map[string]*rule + // variables stack, map of label to value + vstack []map[string]any + // rule stack, allows identification of the current rule in errors + rstack []*rule + + // parse fail + maxFailPos position + maxFailExpected []string + maxFailInvertExpected bool + + // max number of expressions to be parsed + maxExprCnt uint64 + // entrypoint for the parser + entrypoint string + + allowInvalidUTF8 bool + + *Stats + + choiceNoMatch string + // recovery expression stack, keeps track of the currently available recovery expression, these are traversed in reverse + recoveryStack []map[string]any +} + +// push a variable set on the vstack. +func (p *parser) pushV() { + if cap(p.vstack) == len(p.vstack) { + // create new empty slot in the stack + p.vstack = append(p.vstack, nil) + } else { + // slice to 1 more + p.vstack = p.vstack[:len(p.vstack)+1] + } + + // get the last args set + m := p.vstack[len(p.vstack)-1] + if m != nil && len(m) == 0 { + // empty map, all good + return + } + + m = make(map[string]any) + p.vstack[len(p.vstack)-1] = m +} + +// pop a variable set from the vstack. +func (p *parser) popV() { + // if the map is not empty, clear it + m := p.vstack[len(p.vstack)-1] + if len(m) > 0 { + // GC that map + p.vstack[len(p.vstack)-1] = nil + } + p.vstack = p.vstack[:len(p.vstack)-1] +} + +// push a recovery expression with its labels to the recoveryStack +func (p *parser) pushRecovery(labels []string, expr any) { + if cap(p.recoveryStack) == len(p.recoveryStack) { + // create new empty slot in the stack + p.recoveryStack = append(p.recoveryStack, nil) + } else { + // slice to 1 more + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)+1] + } + + m := make(map[string]any, len(labels)) + for _, fl := range labels { + m[fl] = expr + } + p.recoveryStack[len(p.recoveryStack)-1] = m +} + +// pop a recovery expression from the recoveryStack +func (p *parser) popRecovery() { + // GC that map + p.recoveryStack[len(p.recoveryStack)-1] = nil + + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)-1] +} + +func (p *parser) print(prefix, s string) string { + if !p.debug { + return s + } + + fmt.Printf("%s %d:%d:%d: %s [%#U]\n", + prefix, p.pt.line, p.pt.col, p.pt.offset, s, p.pt.rn) + return s +} + +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + +func (p *parser) in(s string) string { + res := p.printIndent(">", s) + p.depth++ + return res +} + +func (p *parser) out(s string) string { + p.depth-- + return p.printIndent("<", s) +} + +func (p *parser) addErr(err error) { + p.addErrAt(err, p.pt.position, []string{}) +} + +func (p *parser) addErrAt(err error, pos position, expected []string) { + var buf bytes.Buffer + if p.filename != "" { + buf.WriteString(p.filename) + } + if buf.Len() > 0 { + buf.WriteString(":") + } + buf.WriteString(fmt.Sprintf("%d:%d (%d)", pos.line, pos.col, pos.offset)) + if len(p.rstack) > 0 { + if buf.Len() > 0 { + buf.WriteString(": ") + } + rule := p.rstack[len(p.rstack)-1] + if rule.displayName != "" { + buf.WriteString("rule " + rule.displayName) + } else { + buf.WriteString("rule " + rule.name) + } + } + pe := &parserError{Inner: err, pos: pos, prefix: buf.String(), expected: expected} + p.errs.add(pe) +} + +func (p *parser) failAt(fail bool, pos position, want string) { + // process fail if parsing fails and not inverted or parsing succeeds and invert is set + if fail == p.maxFailInvertExpected { + if pos.offset < p.maxFailPos.offset { + return + } + + if pos.offset > p.maxFailPos.offset { + p.maxFailPos = pos + p.maxFailExpected = p.maxFailExpected[:0] + } + + if p.maxFailInvertExpected { + want = "!" + want + } + p.maxFailExpected = append(p.maxFailExpected, want) + } +} + +// read advances the parser to the next rune. +func (p *parser) read() { + p.pt.offset += p.pt.w + rn, n := utf8.DecodeRune(p.data[p.pt.offset:]) + p.pt.rn = rn + p.pt.w = n + p.pt.col++ + if rn == '\n' { + p.pt.line++ + p.pt.col = 0 + } + + if rn == utf8.RuneError && n == 1 { // see utf8.DecodeRune + if !p.allowInvalidUTF8 { + p.addErr(errInvalidEncoding) + } + } +} + +// restore parser position to the savepoint pt. +func (p *parser) restore(pt savepoint) { + if p.debug { + defer p.out(p.in("restore")) + } + if pt.offset == p.pt.offset { + return + } + p.pt = pt +} + +// Cloner is implemented by any value that has a Clone method, which returns a +// copy of the value. This is mainly used for types which are not passed by +// value (e.g map, slice, chan) or structs that contain such types. +// +// This is used in conjunction with the global state feature to create proper +// copies of the state to allow the parser to properly restore the state in +// the case of backtracking. +type Cloner interface { + Clone() any +} + +var statePool = &sync.Pool{ + New: func() any { return make(storeDict) }, +} + +func (sd storeDict) Discard() { + for k := range sd { + delete(sd, k) + } + statePool.Put(sd) +} + +// clone and return parser current state. +func (p *parser) cloneState() storeDict { + if p.debug { + defer p.out(p.in("cloneState")) + } + + state := statePool.Get().(storeDict) + for k, v := range p.cur.state { + if c, ok := v.(Cloner); ok { + state[k] = c.Clone() + } else { + state[k] = v + } + } + return state +} + +// restore parser current state to the state storeDict. +// every restoreState should applied only one time for every cloned state +func (p *parser) restoreState(state storeDict) { + if p.debug { + defer p.out(p.in("restoreState")) + } + p.cur.state.Discard() + p.cur.state = state +} + +// get the slice of bytes from the savepoint start to the current position. +func (p *parser) sliceFrom(start savepoint) []byte { + return p.data[start.position.offset:p.pt.position.offset] +} + +func (p *parser) getMemoized(node any) (resultTuple, bool) { + if len(p.memo) == 0 { + return resultTuple{}, false + } + m := p.memo[p.pt.offset] + if len(m) == 0 { + return resultTuple{}, false + } + res, ok := m[node] + return res, ok +} + +func (p *parser) setMemoized(pt savepoint, node any, tuple resultTuple) { + if p.memo == nil { + p.memo = make(map[int]map[any]resultTuple) + } + m := p.memo[pt.offset] + if m == nil { + m = make(map[any]resultTuple) + p.memo[pt.offset] = m + } + m[node] = tuple +} + +func (p *parser) buildRulesTable(g *grammar) { + p.rules = make(map[string]*rule, len(g.rules)) + for _, r := range g.rules { + p.rules[r.name] = r + } +} + +// nolint: gocyclo +func (p *parser) parse(g *grammar) (val any, err error) { + if len(g.rules) == 0 { + p.addErr(errNoRule) + return nil, p.errs.err() + } + + // TODO : not super critical but this could be generated + p.buildRulesTable(g) + + if p.recover { + // panic can be used in action code to stop parsing immediately + // and return the panic as an error. + defer func() { + if e := recover(); e != nil { + if p.debug { + defer p.out(p.in("panic handler")) + } + val = nil + switch e := e.(type) { + case error: + p.addErr(e) + default: + p.addErr(fmt.Errorf("%v", e)) + } + err = p.errs.err() + } + }() + } + + startRule, ok := p.rules[p.entrypoint] + if !ok { + p.addErr(errInvalidEntrypoint) + return nil, p.errs.err() + } + + p.read() // advance to first rune + val, ok = p.parseRuleWrap(startRule) + if !ok { + if len(*p.errs) == 0 { + // If parsing fails, but no errors have been recorded, the expected values + // for the farthest parser position are returned as error. + maxFailExpectedMap := make(map[string]struct{}, len(p.maxFailExpected)) + for _, v := range p.maxFailExpected { + maxFailExpectedMap[v] = struct{}{} + } + expected := make([]string, 0, len(maxFailExpectedMap)) + eof := false + if _, ok := maxFailExpectedMap["!."]; ok { + delete(maxFailExpectedMap, "!.") + eof = true + } + for k := range maxFailExpectedMap { + expected = append(expected, k) + } + sort.Strings(expected) + if eof { + expected = append(expected, "EOF") + } + p.addErrAt(errors.New("no match found, expected: "+listJoin(expected, ", ", "or")), p.maxFailPos, expected) + } + + return nil, p.errs.err() + } + return val, p.errs.err() +} + +func listJoin(list []string, sep string, lastSep string) string { + switch len(list) { + case 0: + return "" + case 1: + return list[0] + default: + return strings.Join(list[:len(list)-1], sep) + " " + lastSep + " " + list[len(list)-1] + } +} + +func (p *parser) parseRuleRecursiveLeader(rule *rule) (any, bool) { + result, ok := p.getMemoized(rule) + if ok { + p.restore(result.end) + return result.v, result.b + } + + if p.debug { + defer p.out(p.in("recursive " + rule.name)) + } + + var ( + depth = 0 + startMark = p.pt + lastResult = resultTuple{nil, false, startMark} + lastErrors = *p.errs + ) + + for { + lastState := p.cloneState() + p.setMemoized(startMark, rule, lastResult) + val, ok := p.parseRule(rule) + endMark := p.pt + if p.debug { + p.printIndent("RECURSIVE", fmt.Sprintf( + "Rule %s depth %d: %t -> %s", + rule.name, depth, ok, string(p.sliceFrom(startMark)))) + } + if (!ok) || (endMark.offset <= lastResult.end.offset && depth != 0) { + p.restoreState(lastState) + *p.errs = lastErrors + break + } + lastResult = resultTuple{val, ok, endMark} + lastErrors = *p.errs + p.restore(startMark) + depth++ + } + + p.restore(lastResult.end) + p.setMemoized(startMark, rule, lastResult) + return lastResult.v, lastResult.b +} + +func (p *parser) parseRuleRecursiveNoLeader(rule *rule) (any, bool) { + return p.parseRule(rule) +} + +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { + if p.debug { + defer p.out(p.in("parseRule " + rule.name)) + } + var ( + val any + ok bool + startMark = p.pt + ) + + if p.memoize || rule.leftRecursive { + if rule.leader { + val, ok = p.parseRuleRecursiveLeader(rule) + } else if p.memoize && !rule.leftRecursive { + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRuleRecursiveNoLeader(rule) + } + } else { + val, ok = p.parseRule(rule) + } + + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { + p.rstack = append(p.rstack, rule) + p.pushV() + val, ok := p.parseExprWrap(rule.expr) + p.popV() + p.rstack = p.rstack[:len(p.rstack)-1] + return val, ok +} + +func (p *parser) parseExprWrap(expr any) (any, bool) { + var pt savepoint + + isLeftRecusion := p.rstack[len(p.rstack)-1].leftRecursive + if p.memoize && !isLeftRecusion { + res, ok := p.getMemoized(expr) + if ok { + p.restore(res.end) + return res.v, res.b + } + pt = p.pt + } + + val, ok := p.parseExpr(expr) + + if p.memoize && !isLeftRecusion { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { + p.ExprCnt++ + if p.ExprCnt > p.maxExprCnt { + panic(errMaxExprCnt) + } + + var val any + var ok bool + switch expr := expr.(type) { + case *actionExpr: + val, ok = p.parseActionExpr(expr) + case *andCodeExpr: + val, ok = p.parseAndCodeExpr(expr) + case *andExpr: + val, ok = p.parseAndExpr(expr) + case *anyMatcher: + val, ok = p.parseAnyMatcher(expr) + case *charClassMatcher: + val, ok = p.parseCharClassMatcher(expr) + case *choiceExpr: + val, ok = p.parseChoiceExpr(expr) + case *labeledExpr: + val, ok = p.parseLabeledExpr(expr) + case *litMatcher: + val, ok = p.parseLitMatcher(expr) + case *notCodeExpr: + val, ok = p.parseNotCodeExpr(expr) + case *notExpr: + val, ok = p.parseNotExpr(expr) + case *oneOrMoreExpr: + val, ok = p.parseOneOrMoreExpr(expr) + case *recoveryExpr: + val, ok = p.parseRecoveryExpr(expr) + case *ruleRefExpr: + val, ok = p.parseRuleRefExpr(expr) + case *seqExpr: + val, ok = p.parseSeqExpr(expr) + case *stateCodeExpr: + val, ok = p.parseStateCodeExpr(expr) + case *throwExpr: + val, ok = p.parseThrowExpr(expr) + case *zeroOrMoreExpr: + val, ok = p.parseZeroOrMoreExpr(expr) + case *zeroOrOneExpr: + val, ok = p.parseZeroOrOneExpr(expr) + default: + panic(fmt.Sprintf("unknown expression type %T", expr)) + } + return val, ok +} + +func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseActionExpr")) + } + + start := p.pt + val, ok := p.parseExprWrap(act.expr) + if ok { + p.cur.pos = start.position + p.cur.text = p.sliceFrom(start) + state := p.cloneState() + actVal, err := act.run(p) + if err != nil { + p.addErrAt(err, start.position, []string{}) + } + p.restoreState(state) + + val = actVal + } + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(start))) + } + return val, ok +} + +func (p *parser) parseAndCodeExpr(and *andCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseAndCodeExpr")) + } + + state := p.cloneState() + + ok, err := and.run(p) + if err != nil { + p.addErr(err) + } + p.restoreState(state) + + return nil, ok +} + +func (p *parser) parseAndExpr(and *andExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseAndExpr")) + } + + pt := p.pt + state := p.cloneState() + p.pushV() + _, ok := p.parseExprWrap(and.expr) + p.popV() + p.restoreState(state) + p.restore(pt) + + return nil, ok +} + +func (p *parser) parseAnyMatcher(any *anyMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseAnyMatcher")) + } + + if p.pt.rn == utf8.RuneError && p.pt.w == 0 { + // EOF - see utf8.DecodeRune + p.failAt(false, p.pt.position, ".") + return nil, false + } + start := p.pt + p.read() + p.failAt(true, start.position, ".") + return p.sliceFrom(start), true +} + +// nolint: gocyclo +func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseCharClassMatcher")) + } + + cur := p.pt.rn + start := p.pt + + // can't match EOF + if cur == utf8.RuneError && p.pt.w == 0 { // see utf8.DecodeRune + p.failAt(false, start.position, chr.val) + return nil, false + } + + if chr.ignoreCase { + cur = unicode.ToLower(cur) + } + + // try to match in the list of available chars + for _, rn := range chr.chars { + if rn == cur { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of ranges + for i := 0; i < len(chr.ranges); i += 2 { + if cur >= chr.ranges[i] && cur <= chr.ranges[i+1] { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of Unicode classes + for _, cl := range chr.classes { + if unicode.Is(cl, cur) { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + if chr.inverted { + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + p.failAt(false, start.position, chr.val) + return nil, false +} + +func (p *parser) incChoiceAltCnt(ch *choiceExpr, altI int) { + choiceIdent := fmt.Sprintf("%s %d:%d", p.rstack[len(p.rstack)-1].name, ch.pos.line, ch.pos.col) + m := p.ChoiceAltCnt[choiceIdent] + if m == nil { + m = make(map[string]int) + p.ChoiceAltCnt[choiceIdent] = m + } + // We increment altI by 1, so the keys do not start at 0 + alt := strconv.Itoa(altI + 1) + if altI == choiceNoMatch { + alt = p.choiceNoMatch + } + m[alt]++ +} + +func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseChoiceExpr")) + } + + for altI, alt := range ch.alternatives { + // dummy assignment to prevent compile error if optimized + _ = altI + + state := p.cloneState() + + p.pushV() + val, ok := p.parseExprWrap(alt) + p.popV() + if ok { + p.incChoiceAltCnt(ch, altI) + return val, ok + } + p.restoreState(state) + } + p.incChoiceAltCnt(ch, choiceNoMatch) + return nil, false +} + +func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseLabeledExpr")) + } + + p.pushV() + val, ok := p.parseExprWrap(lab.expr) + p.popV() + if ok && lab.label != "" { + m := p.vstack[len(p.vstack)-1] + m[lab.label] = val + } + return val, ok +} + +func (p *parser) parseLitMatcher(lit *litMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseLitMatcher")) + } + + start := p.pt + for _, want := range lit.val { + cur := p.pt.rn + if lit.ignoreCase { + cur = unicode.ToLower(cur) + } + if cur != want { + p.failAt(false, start.position, lit.want) + p.restore(start) + return nil, false + } + p.read() + } + p.failAt(true, start.position, lit.want) + return p.sliceFrom(start), true +} + +func (p *parser) parseNotCodeExpr(not *notCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseNotCodeExpr")) + } + + state := p.cloneState() + + ok, err := not.run(p) + if err != nil { + p.addErr(err) + } + p.restoreState(state) + + return nil, !ok +} + +func (p *parser) parseNotExpr(not *notExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseNotExpr")) + } + + pt := p.pt + state := p.cloneState() + p.pushV() + p.maxFailInvertExpected = !p.maxFailInvertExpected + _, ok := p.parseExprWrap(not.expr) + p.maxFailInvertExpected = !p.maxFailInvertExpected + p.popV() + p.restoreState(state) + p.restore(pt) + + return nil, !ok +} + +func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseOneOrMoreExpr")) + } + + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + if len(vals) == 0 { + // did not match once, no match + return nil, false + } + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseRecoveryExpr (" + strings.Join(recover.failureLabel, ",") + ")")) + } + + p.pushRecovery(recover.failureLabel, recover.recoverExpr) + val, ok := p.parseExprWrap(recover.expr) + p.popRecovery() + + return val, ok +} + +func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseRuleRefExpr " + ref.name)) + } + + if ref.name == "" { + panic(fmt.Sprintf("%s: invalid rule: missing name", ref.pos)) + } + + rule := p.rules[ref.name] + if rule == nil { + p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) + return nil, false + } + return p.parseRuleWrap(rule) +} + +func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseSeqExpr")) + } + + vals := make([]any, 0, len(seq.exprs)) + + pt := p.pt + state := p.cloneState() + for _, expr := range seq.exprs { + val, ok := p.parseExprWrap(expr) + if !ok { + p.restoreState(state) + p.restore(pt) + return nil, false + } + vals = append(vals, val) + } + return vals, true +} + +func (p *parser) parseStateCodeExpr(state *stateCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseStateCodeExpr")) + } + + err := state.run(p) + if err != nil { + p.addErr(err) + } + return nil, true +} + +func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseThrowExpr")) + } + + for i := len(p.recoveryStack) - 1; i >= 0; i-- { + if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { + return val, ok + } + } + } + + return nil, false +} + +func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseZeroOrMoreExpr")) + } + + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseZeroOrOneExpr")) + } + + p.pushV() + val, _ := p.parseExprWrap(expr.expr) + p.popV() + // whether it matched or not, consider it a match + return val, true +} diff --git a/test/left_recursion_thrownrecover/errors.go b/test/left_recursion_thrownrecover/errors.go new file mode 100644 index 0000000..dd0bee6 --- /dev/null +++ b/test/left_recursion_thrownrecover/errors.go @@ -0,0 +1,31 @@ +package leftrecursionthrownrecover + +// ErrorLister is the public interface to access the inner errors +// included in a errList. +type ErrorLister interface { + Errors() []error +} + +func (e errList) Errors() []error { + return e +} + +// ParserError is the public interface to errors of type parserError. +type ParserError interface { + Error() string + InnerError() error + Pos() (int, int, int) + Expected() []string +} + +func (p *parserError) InnerError() error { + return p.Inner +} + +func (p *parserError) Pos() (line, col, offset int) { + return p.pos.line, p.pos.col, p.pos.offset +} + +func (p *parserError) Expected() []string { + return p.expected +} diff --git a/test/left_recursion_thrownrecover/left_recursion_thrownrecover.go b/test/left_recursion_thrownrecover/left_recursion_thrownrecover.go new file mode 100644 index 0000000..624cd99 --- /dev/null +++ b/test/left_recursion_thrownrecover/left_recursion_thrownrecover.go @@ -0,0 +1,2588 @@ +// Code generated by pigeon; DO NOT EDIT. + +package leftrecursionthrownrecover + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "os" + "sort" + "strconv" + "strings" + "sync" + "unicode" + "unicode/utf8" +) + +var g = &grammar{ + rules: []*rule{ + { + name: "Start", + pos: position{line: 6, col: 1, offset: 41}, + expr: &andCodeExpr{ + pos: position{line: 6, col: 9, offset: 49}, + run: (*parser).callonStart1, + }, + leader: false, + leftRecursive: false, + }, + { + name: "case01", + pos: position{line: 11, col: 1, offset: 108}, + expr: &actionExpr{ + pos: position{line: 11, col: 10, offset: 117}, + run: (*parser).calloncase011, + expr: &labeledExpr{ + pos: position{line: 11, col: 10, offset: 117}, + label: "case01", + expr: &ruleRefExpr{ + pos: position{line: 11, col: 17, offset: 124}, + name: "MultiLabelRecover", + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "MultiLabelRecover", + pos: position{line: 13, col: 1, offset: 167}, + expr: &recoveryExpr{ + pos: position{line: 13, col: 21, offset: 187}, + expr: &ruleRefExpr{ + pos: position{line: 13, col: 21, offset: 187}, + name: "number", + }, + recoverExpr: &ruleRefExpr{ + pos: position{line: 13, col: 51, offset: 217}, + name: "ErrNonNumber", + }, + failureLabel: []string{ + "errAlpha", + "errOther", + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "number", + pos: position{line: 15, col: 1, offset: 231}, + expr: &choiceExpr{ + pos: position{line: 15, col: 10, offset: 240}, + alternatives: []any{ + ¬Expr{ + pos: position{line: 15, col: 10, offset: 240}, + expr: &anyMatcher{ + line: 15, col: 11, offset: 241, + }, + }, + &actionExpr{ + pos: position{line: 15, col: 15, offset: 245}, + run: (*parser).callonnumber4, + expr: &seqExpr{ + pos: position{line: 15, col: 16, offset: 246}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 15, col: 16, offset: 246}, + label: "n", + expr: &ruleRefExpr{ + pos: position{line: 15, col: 18, offset: 248}, + name: "number", + }, + }, + &labeledExpr{ + pos: position{line: 15, col: 25, offset: 255}, + label: "d", + expr: &ruleRefExpr{ + pos: position{line: 15, col: 27, offset: 257}, + name: "digit", + }, + }, + }, + }, + }, + &actionExpr{ + pos: position{line: 17, col: 5, offset: 310}, + run: (*parser).callonnumber10, + expr: &labeledExpr{ + pos: position{line: 17, col: 5, offset: 310}, + label: "d", + expr: &ruleRefExpr{ + pos: position{line: 17, col: 7, offset: 312}, + name: "digit", + }, + }, + }, + }, + }, + leader: true, + leftRecursive: true, + }, + { + name: "digit", + pos: position{line: 21, col: 1, offset: 350}, + expr: &choiceExpr{ + pos: position{line: 21, col: 9, offset: 358}, + alternatives: []any{ + &actionExpr{ + pos: position{line: 21, col: 9, offset: 358}, + run: (*parser).callondigit2, + expr: &charClassMatcher{ + pos: position{line: 21, col: 9, offset: 358}, + val: "[0-9]", + ranges: []rune{'0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + &actionExpr{ + pos: position{line: 23, col: 5, offset: 401}, + run: (*parser).callondigit4, + expr: &labeledExpr{ + pos: position{line: 23, col: 5, offset: 401}, + label: "x", + expr: &seqExpr{ + pos: position{line: 23, col: 9, offset: 405}, + exprs: []any{ + &andExpr{ + pos: position{line: 23, col: 9, offset: 405}, + expr: &charClassMatcher{ + pos: position{line: 23, col: 10, offset: 406}, + val: "[a-z]", + ranges: []rune{'a', 'z'}, + ignoreCase: false, + inverted: false, + }, + }, + &throwExpr{ + pos: position{line: 23, col: 16, offset: 412}, + label: "errAlpha", + }, + }, + }, + }, + }, + &throwExpr{ + pos: position{line: 25, col: 5, offset: 461}, + label: "errOther", + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "ErrNonNumber", + pos: position{line: 27, col: 1, offset: 474}, + expr: &actionExpr{ + pos: position{line: 27, col: 16, offset: 489}, + run: (*parser).callonErrNonNumber1, + expr: &seqExpr{ + pos: position{line: 27, col: 16, offset: 489}, + exprs: []any{ + &andCodeExpr{ + pos: position{line: 27, col: 16, offset: 489}, + run: (*parser).callonErrNonNumber3, + }, + &zeroOrMoreExpr{ + pos: position{line: 29, col: 3, offset: 544}, + expr: &seqExpr{ + pos: position{line: 29, col: 5, offset: 546}, + exprs: []any{ + ¬Expr{ + pos: position{line: 29, col: 5, offset: 546}, + expr: &charClassMatcher{ + pos: position{line: 29, col: 6, offset: 547}, + val: "[0-9]", + ranges: []rune{'0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + &anyMatcher{ + line: 29, col: 12, offset: 553, + }, + }, + }, + }, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "case02", + pos: position{line: 34, col: 1, offset: 615}, + expr: &choiceExpr{ + pos: position{line: 34, col: 11, offset: 625}, + alternatives: []any{ + &ruleRefExpr{ + pos: position{line: 34, col: 11, offset: 625}, + name: "ThrowUndefLabel", + }, + &andCodeExpr{ + pos: position{line: 34, col: 29, offset: 643}, + run: (*parser).calloncase023, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "ThrowUndefLabel", + pos: position{line: 36, col: 1, offset: 702}, + expr: &seqExpr{ + pos: position{line: 36, col: 19, offset: 720}, + exprs: []any{ + &ruleRefExpr{ + pos: position{line: 36, col: 19, offset: 720}, + name: "ThrowUndefLabel", + }, + &throwExpr{ + pos: position{line: 36, col: 35, offset: 736}, + label: "undeflabel", + }, + }, + }, + leader: true, + leftRecursive: true, + }, + { + name: "case03", + pos: position{line: 41, col: 1, offset: 780}, + expr: &actionExpr{ + pos: position{line: 41, col: 10, offset: 789}, + run: (*parser).calloncase031, + expr: &labeledExpr{ + pos: position{line: 41, col: 10, offset: 789}, + label: "case03", + expr: &ruleRefExpr{ + pos: position{line: 41, col: 17, offset: 796}, + name: "OuterRecover03", + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "OuterRecover03", + pos: position{line: 43, col: 1, offset: 835}, + expr: &recoveryExpr{ + pos: position{line: 43, col: 18, offset: 852}, + expr: &recoveryExpr{ + pos: position{line: 43, col: 18, offset: 852}, + expr: &ruleRefExpr{ + pos: position{line: 43, col: 18, offset: 852}, + name: "InnerRecover03", + }, + recoverExpr: &ruleRefExpr{ + pos: position{line: 43, col: 66, offset: 900}, + name: "ErrAlphaOuter03", + }, + failureLabel: []string{ + "errAlphaLower", + "errAlphaUpper", + }, + }, + recoverExpr: &ruleRefExpr{ + pos: position{line: 43, col: 95, offset: 929}, + name: "ErrOtherOuter03", + }, + failureLabel: []string{ + "errOther", + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "InnerRecover03", + pos: position{line: 45, col: 1, offset: 946}, + expr: &recoveryExpr{ + pos: position{line: 45, col: 18, offset: 963}, + expr: &ruleRefExpr{ + pos: position{line: 45, col: 18, offset: 963}, + name: "number03", + }, + recoverExpr: &ruleRefExpr{ + pos: position{line: 45, col: 45, offset: 990}, + name: "ErrAlphaInner03", + }, + failureLabel: []string{ + "errAlphaLower", + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "number03", + pos: position{line: 47, col: 1, offset: 1007}, + expr: &choiceExpr{ + pos: position{line: 47, col: 12, offset: 1018}, + alternatives: []any{ + ¬Expr{ + pos: position{line: 47, col: 12, offset: 1018}, + expr: &anyMatcher{ + line: 47, col: 13, offset: 1019, + }, + }, + &actionExpr{ + pos: position{line: 47, col: 17, offset: 1023}, + run: (*parser).callonnumber034, + expr: &seqExpr{ + pos: position{line: 47, col: 18, offset: 1024}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 47, col: 18, offset: 1024}, + label: "n", + expr: &ruleRefExpr{ + pos: position{line: 47, col: 20, offset: 1026}, + name: "number03", + }, + }, + &labeledExpr{ + pos: position{line: 47, col: 29, offset: 1035}, + label: "d", + expr: &ruleRefExpr{ + pos: position{line: 47, col: 31, offset: 1037}, + name: "digit03", + }, + }, + }, + }, + }, + &actionExpr{ + pos: position{line: 49, col: 5, offset: 1092}, + run: (*parser).callonnumber0310, + expr: &labeledExpr{ + pos: position{line: 49, col: 5, offset: 1092}, + label: "d", + expr: &ruleRefExpr{ + pos: position{line: 49, col: 7, offset: 1094}, + name: "digit03", + }, + }, + }, + }, + }, + leader: true, + leftRecursive: true, + }, + { + name: "digit03", + pos: position{line: 53, col: 1, offset: 1134}, + expr: &choiceExpr{ + pos: position{line: 53, col: 11, offset: 1144}, + alternatives: []any{ + &actionExpr{ + pos: position{line: 53, col: 11, offset: 1144}, + run: (*parser).callondigit032, + expr: &charClassMatcher{ + pos: position{line: 53, col: 11, offset: 1144}, + val: "[0-9]", + ranges: []rune{'0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + &actionExpr{ + pos: position{line: 55, col: 5, offset: 1187}, + run: (*parser).callondigit034, + expr: &labeledExpr{ + pos: position{line: 55, col: 5, offset: 1187}, + label: "x", + expr: &seqExpr{ + pos: position{line: 55, col: 9, offset: 1191}, + exprs: []any{ + &andExpr{ + pos: position{line: 55, col: 9, offset: 1191}, + expr: &charClassMatcher{ + pos: position{line: 55, col: 10, offset: 1192}, + val: "[a-z]", + ranges: []rune{'a', 'z'}, + ignoreCase: false, + inverted: false, + }, + }, + &throwExpr{ + pos: position{line: 55, col: 16, offset: 1198}, + label: "errAlphaLower", + }, + }, + }, + }, + }, + &actionExpr{ + pos: position{line: 57, col: 5, offset: 1252}, + run: (*parser).callondigit0310, + expr: &labeledExpr{ + pos: position{line: 57, col: 5, offset: 1252}, + label: "x", + expr: &seqExpr{ + pos: position{line: 57, col: 9, offset: 1256}, + exprs: []any{ + &andExpr{ + pos: position{line: 57, col: 9, offset: 1256}, + expr: &charClassMatcher{ + pos: position{line: 57, col: 10, offset: 1257}, + val: "[A-Z]", + ranges: []rune{'A', 'Z'}, + ignoreCase: false, + inverted: false, + }, + }, + &throwExpr{ + pos: position{line: 57, col: 16, offset: 1263}, + label: "errAlphaUpper", + }, + }, + }, + }, + }, + &throwExpr{ + pos: position{line: 59, col: 5, offset: 1317}, + label: "errOther", + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "ErrAlphaInner03", + pos: position{line: 61, col: 1, offset: 1330}, + expr: &actionExpr{ + pos: position{line: 61, col: 19, offset: 1348}, + run: (*parser).callonErrAlphaInner031, + expr: &seqExpr{ + pos: position{line: 61, col: 19, offset: 1348}, + exprs: []any{ + &andCodeExpr{ + pos: position{line: 61, col: 19, offset: 1348}, + run: (*parser).callonErrAlphaInner033, + }, + &zeroOrMoreExpr{ + pos: position{line: 63, col: 3, offset: 1424}, + expr: &seqExpr{ + pos: position{line: 63, col: 5, offset: 1426}, + exprs: []any{ + ¬Expr{ + pos: position{line: 63, col: 5, offset: 1426}, + expr: &charClassMatcher{ + pos: position{line: 63, col: 6, offset: 1427}, + val: "[0-9]", + ranges: []rune{'0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + &anyMatcher{ + line: 63, col: 12, offset: 1433, + }, + }, + }, + }, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "ErrAlphaOuter03", + pos: position{line: 65, col: 1, offset: 1459}, + expr: &actionExpr{ + pos: position{line: 65, col: 19, offset: 1477}, + run: (*parser).callonErrAlphaOuter031, + expr: &seqExpr{ + pos: position{line: 65, col: 19, offset: 1477}, + exprs: []any{ + &andCodeExpr{ + pos: position{line: 65, col: 19, offset: 1477}, + run: (*parser).callonErrAlphaOuter033, + }, + &zeroOrMoreExpr{ + pos: position{line: 67, col: 3, offset: 1553}, + expr: &seqExpr{ + pos: position{line: 67, col: 5, offset: 1555}, + exprs: []any{ + ¬Expr{ + pos: position{line: 67, col: 5, offset: 1555}, + expr: &charClassMatcher{ + pos: position{line: 67, col: 6, offset: 1556}, + val: "[0-9]", + ranges: []rune{'0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + &anyMatcher{ + line: 67, col: 12, offset: 1562, + }, + }, + }, + }, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "ErrOtherOuter03", + pos: position{line: 69, col: 1, offset: 1588}, + expr: &actionExpr{ + pos: position{line: 69, col: 19, offset: 1606}, + run: (*parser).callonErrOtherOuter031, + expr: &seqExpr{ + pos: position{line: 69, col: 19, offset: 1606}, + exprs: []any{ + &andCodeExpr{ + pos: position{line: 69, col: 19, offset: 1606}, + run: (*parser).callonErrOtherOuter033, + }, + &zeroOrMoreExpr{ + pos: position{line: 71, col: 3, offset: 1677}, + expr: &seqExpr{ + pos: position{line: 71, col: 5, offset: 1679}, + exprs: []any{ + ¬Expr{ + pos: position{line: 71, col: 5, offset: 1679}, + expr: &charClassMatcher{ + pos: position{line: 71, col: 6, offset: 1680}, + val: "[0-9]", + ranges: []rune{'0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + &anyMatcher{ + line: 71, col: 12, offset: 1686, + }, + }, + }, + }, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "case04", + pos: position{line: 76, col: 1, offset: 1771}, + expr: &actionExpr{ + pos: position{line: 76, col: 10, offset: 1780}, + run: (*parser).calloncase041, + expr: &labeledExpr{ + pos: position{line: 76, col: 10, offset: 1780}, + label: "case04", + expr: &ruleRefExpr{ + pos: position{line: 76, col: 17, offset: 1787}, + name: "OuterRecover04", + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "OuterRecover04", + pos: position{line: 78, col: 1, offset: 1826}, + expr: &recoveryExpr{ + pos: position{line: 78, col: 18, offset: 1843}, + expr: &recoveryExpr{ + pos: position{line: 78, col: 18, offset: 1843}, + expr: &ruleRefExpr{ + pos: position{line: 78, col: 18, offset: 1843}, + name: "InnerRecover04", + }, + recoverExpr: &ruleRefExpr{ + pos: position{line: 78, col: 66, offset: 1891}, + name: "ErrAlphaOuter04", + }, + failureLabel: []string{ + "errAlphaLower", + "errAlphaUpper", + }, + }, + recoverExpr: &ruleRefExpr{ + pos: position{line: 78, col: 95, offset: 1920}, + name: "ErrOtherOuter04", + }, + failureLabel: []string{ + "errOther", + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "InnerRecover04", + pos: position{line: 80, col: 1, offset: 1937}, + expr: &recoveryExpr{ + pos: position{line: 80, col: 18, offset: 1954}, + expr: &ruleRefExpr{ + pos: position{line: 80, col: 18, offset: 1954}, + name: "number04", + }, + recoverExpr: &ruleRefExpr{ + pos: position{line: 80, col: 45, offset: 1981}, + name: "ErrAlphaInner04", + }, + failureLabel: []string{ + "errAlphaLower", + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "number04", + pos: position{line: 82, col: 1, offset: 1998}, + expr: &choiceExpr{ + pos: position{line: 82, col: 12, offset: 2009}, + alternatives: []any{ + ¬Expr{ + pos: position{line: 82, col: 12, offset: 2009}, + expr: &anyMatcher{ + line: 82, col: 13, offset: 2010, + }, + }, + &actionExpr{ + pos: position{line: 82, col: 17, offset: 2014}, + run: (*parser).callonnumber044, + expr: &seqExpr{ + pos: position{line: 82, col: 18, offset: 2015}, + exprs: []any{ + &labeledExpr{ + pos: position{line: 82, col: 18, offset: 2015}, + label: "n", + expr: &ruleRefExpr{ + pos: position{line: 82, col: 20, offset: 2017}, + name: "number04", + }, + }, + &labeledExpr{ + pos: position{line: 82, col: 29, offset: 2026}, + label: "d", + expr: &ruleRefExpr{ + pos: position{line: 82, col: 31, offset: 2028}, + name: "digit04", + }, + }, + }, + }, + }, + &actionExpr{ + pos: position{line: 84, col: 5, offset: 2083}, + run: (*parser).callonnumber0410, + expr: &labeledExpr{ + pos: position{line: 84, col: 5, offset: 2083}, + label: "d", + expr: &ruleRefExpr{ + pos: position{line: 84, col: 7, offset: 2085}, + name: "digit04", + }, + }, + }, + }, + }, + leader: true, + leftRecursive: true, + }, + { + name: "digit04", + pos: position{line: 88, col: 1, offset: 2125}, + expr: &choiceExpr{ + pos: position{line: 88, col: 11, offset: 2135}, + alternatives: []any{ + &actionExpr{ + pos: position{line: 88, col: 11, offset: 2135}, + run: (*parser).callondigit042, + expr: &charClassMatcher{ + pos: position{line: 88, col: 11, offset: 2135}, + val: "[0-9]", + ranges: []rune{'0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + &actionExpr{ + pos: position{line: 90, col: 5, offset: 2178}, + run: (*parser).callondigit044, + expr: &labeledExpr{ + pos: position{line: 90, col: 5, offset: 2178}, + label: "x", + expr: &seqExpr{ + pos: position{line: 90, col: 9, offset: 2182}, + exprs: []any{ + &andExpr{ + pos: position{line: 90, col: 9, offset: 2182}, + expr: &charClassMatcher{ + pos: position{line: 90, col: 10, offset: 2183}, + val: "[a-z]", + ranges: []rune{'a', 'z'}, + ignoreCase: false, + inverted: false, + }, + }, + &throwExpr{ + pos: position{line: 90, col: 16, offset: 2189}, + label: "errAlphaLower", + }, + }, + }, + }, + }, + &actionExpr{ + pos: position{line: 92, col: 5, offset: 2243}, + run: (*parser).callondigit0410, + expr: &labeledExpr{ + pos: position{line: 92, col: 5, offset: 2243}, + label: "x", + expr: &seqExpr{ + pos: position{line: 92, col: 9, offset: 2247}, + exprs: []any{ + &andExpr{ + pos: position{line: 92, col: 9, offset: 2247}, + expr: &charClassMatcher{ + pos: position{line: 92, col: 10, offset: 2248}, + val: "[A-Z]", + ranges: []rune{'A', 'Z'}, + ignoreCase: false, + inverted: false, + }, + }, + &throwExpr{ + pos: position{line: 92, col: 16, offset: 2254}, + label: "errAlphaUpper", + }, + }, + }, + }, + }, + &throwExpr{ + pos: position{line: 94, col: 5, offset: 2308}, + label: "errOther", + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "ErrAlphaInner04", + pos: position{line: 96, col: 1, offset: 2321}, + expr: &andCodeExpr{ + pos: position{line: 96, col: 19, offset: 2339}, + run: (*parser).callonErrAlphaInner041, + }, + leader: false, + leftRecursive: false, + }, + { + name: "ErrAlphaOuter04", + pos: position{line: 100, col: 1, offset: 2367}, + expr: &actionExpr{ + pos: position{line: 100, col: 19, offset: 2385}, + run: (*parser).callonErrAlphaOuter041, + expr: &seqExpr{ + pos: position{line: 100, col: 19, offset: 2385}, + exprs: []any{ + &andCodeExpr{ + pos: position{line: 100, col: 19, offset: 2385}, + run: (*parser).callonErrAlphaOuter043, + }, + &zeroOrMoreExpr{ + pos: position{line: 102, col: 3, offset: 2452}, + expr: &seqExpr{ + pos: position{line: 102, col: 5, offset: 2454}, + exprs: []any{ + ¬Expr{ + pos: position{line: 102, col: 5, offset: 2454}, + expr: &charClassMatcher{ + pos: position{line: 102, col: 6, offset: 2455}, + val: "[0-9]", + ranges: []rune{'0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + &anyMatcher{ + line: 102, col: 12, offset: 2461, + }, + }, + }, + }, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + { + name: "ErrOtherOuter04", + pos: position{line: 104, col: 1, offset: 2487}, + expr: &actionExpr{ + pos: position{line: 104, col: 19, offset: 2505}, + run: (*parser).callonErrOtherOuter041, + expr: &seqExpr{ + pos: position{line: 104, col: 19, offset: 2505}, + exprs: []any{ + &andCodeExpr{ + pos: position{line: 104, col: 19, offset: 2505}, + run: (*parser).callonErrOtherOuter043, + }, + &zeroOrMoreExpr{ + pos: position{line: 106, col: 3, offset: 2576}, + expr: &seqExpr{ + pos: position{line: 106, col: 5, offset: 2578}, + exprs: []any{ + ¬Expr{ + pos: position{line: 106, col: 5, offset: 2578}, + expr: &charClassMatcher{ + pos: position{line: 106, col: 6, offset: 2579}, + val: "[0-9]", + ranges: []rune{'0', '9'}, + ignoreCase: false, + inverted: false, + }, + }, + &anyMatcher{ + line: 106, col: 12, offset: 2585, + }, + }, + }, + }, + }, + }, + }, + leader: false, + leftRecursive: false, + }, + }, +} + +func (c *current) onStart1() (bool, error) { + return false, nil +} + +func (p *parser) callonStart1() (bool, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onStart1() +} + +func (c *current) oncase011(case01 any) (any, error) { + return case01, nil +} + +func (p *parser) calloncase011() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.oncase011(stack["case01"]) +} + +func (c *current) onnumber4(n, d any) (any, error) { + return n.(string) + d.(string), nil +} + +func (p *parser) callonnumber4() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onnumber4(stack["n"], stack["d"]) +} + +func (c *current) onnumber10(d any) (any, error) { + return d.(string), nil +} + +func (p *parser) callonnumber10() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onnumber10(stack["d"]) +} + +func (c *current) ondigit2() (any, error) { + return string(c.text), nil +} + +func (p *parser) callondigit2() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.ondigit2() +} + +func (c *current) ondigit4(x any) (any, error) { + return x.([]any)[1], nil +} + +func (p *parser) callondigit4() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.ondigit4(stack["x"]) +} + +func (c *current) onErrNonNumber3() (bool, error) { + return true, errors.New("expecting a number") +} + +func (p *parser) callonErrNonNumber3() (bool, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onErrNonNumber3() +} + +func (c *current) onErrNonNumber1() (any, error) { + return "?", nil +} + +func (p *parser) callonErrNonNumber1() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onErrNonNumber1() +} + +func (c *current) oncase023() (bool, error) { + return false, errors.New("Throwed undefined label") +} + +func (p *parser) calloncase023() (bool, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.oncase023() +} + +func (c *current) oncase031(case03 any) (any, error) { + return case03, nil +} + +func (p *parser) calloncase031() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.oncase031(stack["case03"]) +} + +func (c *current) onnumber034(n, d any) (any, error) { + return n.(string) + d.(string), nil +} + +func (p *parser) callonnumber034() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onnumber034(stack["n"], stack["d"]) +} + +func (c *current) onnumber0310(d any) (any, error) { + return d.(string), nil +} + +func (p *parser) callonnumber0310() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onnumber0310(stack["d"]) +} + +func (c *current) ondigit032() (any, error) { + return string(c.text), nil +} + +func (p *parser) callondigit032() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.ondigit032() +} + +func (c *current) ondigit034(x any) (any, error) { + return x.([]any)[1], nil +} + +func (p *parser) callondigit034() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.ondigit034(stack["x"]) +} + +func (c *current) ondigit0310(x any) (any, error) { + return x.([]any)[1], nil +} + +func (p *parser) callondigit0310() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.ondigit0310(stack["x"]) +} + +func (c *current) onErrAlphaInner033() (bool, error) { + return true, errors.New("expecting a number, got lower case char") +} + +func (p *parser) callonErrAlphaInner033() (bool, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onErrAlphaInner033() +} + +func (c *current) onErrAlphaInner031() (any, error) { + return "<", nil +} + +func (p *parser) callonErrAlphaInner031() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onErrAlphaInner031() +} + +func (c *current) onErrAlphaOuter033() (bool, error) { + return true, errors.New("expecting a number, got upper case char") +} + +func (p *parser) callonErrAlphaOuter033() (bool, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onErrAlphaOuter033() +} + +func (c *current) onErrAlphaOuter031() (any, error) { + return ">", nil +} + +func (p *parser) callonErrAlphaOuter031() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onErrAlphaOuter031() +} + +func (c *current) onErrOtherOuter033() (bool, error) { + return true, errors.New("expecting a number, got a non-char") +} + +func (p *parser) callonErrOtherOuter033() (bool, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onErrOtherOuter033() +} + +func (c *current) onErrOtherOuter031() (any, error) { + return "?", nil +} + +func (p *parser) callonErrOtherOuter031() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onErrOtherOuter031() +} + +func (c *current) oncase041(case04 any) (any, error) { + return case04, nil +} + +func (p *parser) calloncase041() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.oncase041(stack["case04"]) +} + +func (c *current) onnumber044(n, d any) (any, error) { + return n.(string) + d.(string), nil +} + +func (p *parser) callonnumber044() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onnumber044(stack["n"], stack["d"]) +} + +func (c *current) onnumber0410(d any) (any, error) { + return d.(string), nil +} + +func (p *parser) callonnumber0410() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onnumber0410(stack["d"]) +} + +func (c *current) ondigit042() (any, error) { + return string(c.text), nil +} + +func (p *parser) callondigit042() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.ondigit042() +} + +func (c *current) ondigit044(x any) (any, error) { + return x.([]any)[1], nil +} + +func (p *parser) callondigit044() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.ondigit044(stack["x"]) +} + +func (c *current) ondigit0410(x any) (any, error) { + return x.([]any)[1], nil +} + +func (p *parser) callondigit0410() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.ondigit0410(stack["x"]) +} + +func (c *current) onErrAlphaInner041() (bool, error) { + return false, nil +} + +func (p *parser) callonErrAlphaInner041() (bool, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onErrAlphaInner041() +} + +func (c *current) onErrAlphaOuter043() (bool, error) { + return true, errors.New("expecting a number, got a char") +} + +func (p *parser) callonErrAlphaOuter043() (bool, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onErrAlphaOuter043() +} + +func (c *current) onErrAlphaOuter041() (any, error) { + return "x", nil +} + +func (p *parser) callonErrAlphaOuter041() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onErrAlphaOuter041() +} + +func (c *current) onErrOtherOuter043() (bool, error) { + return true, errors.New("expecting a number, got a non-char") +} + +func (p *parser) callonErrOtherOuter043() (bool, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onErrOtherOuter043() +} + +func (c *current) onErrOtherOuter041() (any, error) { + return "?", nil +} + +func (p *parser) callonErrOtherOuter041() (any, error) { + stack := p.vstack[len(p.vstack)-1] + _ = stack + return p.cur.onErrOtherOuter041() +} + +var ( + // errNoRule is returned when the grammar to parse has no rule. + errNoRule = errors.New("grammar has no rule") + + // errInvalidEntrypoint is returned when the specified entrypoint rule + // does not exit. + errInvalidEntrypoint = errors.New("invalid entrypoint") + + // errInvalidEncoding is returned when the source is not properly + // utf8-encoded. + errInvalidEncoding = errors.New("invalid encoding") + + // errMaxExprCnt is used to signal that the maximum number of + // expressions have been parsed. + errMaxExprCnt = errors.New("max number of expresssions parsed") +) + +// Option is a function that can set an option on the parser. It returns +// the previous setting as an Option. +type Option func(*parser) Option + +// MaxExpressions creates an Option to stop parsing after the provided +// number of expressions have been parsed, if the value is 0 then the parser will +// parse for as many steps as needed (possibly an infinite number). +// +// The default for maxExprCnt is 0. +func MaxExpressions(maxExprCnt uint64) Option { + return func(p *parser) Option { + oldMaxExprCnt := p.maxExprCnt + p.maxExprCnt = maxExprCnt + return MaxExpressions(oldMaxExprCnt) + } +} + +// Entrypoint creates an Option to set the rule name to use as entrypoint. +// The rule name must have been specified in the -alternate-entrypoints +// if generating the parser with the -optimize-grammar flag, otherwise +// it may have been optimized out. Passing an empty string sets the +// entrypoint to the first rule in the grammar. +// +// The default is to start parsing at the first rule in the grammar. +func Entrypoint(ruleName string) Option { + return func(p *parser) Option { + oldEntrypoint := p.entrypoint + p.entrypoint = ruleName + if ruleName == "" { + p.entrypoint = g.rules[0].name + } + return Entrypoint(oldEntrypoint) + } +} + +// Statistics adds a user provided Stats struct to the parser to allow +// the user to process the results after the parsing has finished. +// Also the key for the "no match" counter is set. +// +// Example usage: +// +// input := "input" +// stats := Stats{} +// _, err := Parse("input-file", []byte(input), Statistics(&stats, "no match")) +// if err != nil { +// log.Panicln(err) +// } +// b, err := json.MarshalIndent(stats.ChoiceAltCnt, "", " ") +// if err != nil { +// log.Panicln(err) +// } +// fmt.Println(string(b)) +func Statistics(stats *Stats, choiceNoMatch string) Option { + return func(p *parser) Option { + oldStats := p.Stats + p.Stats = stats + oldChoiceNoMatch := p.choiceNoMatch + p.choiceNoMatch = choiceNoMatch + if p.Stats.ChoiceAltCnt == nil { + p.Stats.ChoiceAltCnt = make(map[string]map[string]int) + } + return Statistics(oldStats, oldChoiceNoMatch) + } +} + +// Debug creates an Option to set the debug flag to b. When set to true, +// debugging information is printed to stdout while parsing. +// +// The default is false. +func Debug(b bool) Option { + return func(p *parser) Option { + old := p.debug + p.debug = b + return Debug(old) + } +} + +// Memoize creates an Option to set the memoize flag to b. When set to true, +// the parser will cache all results so each expression is evaluated only +// once. This guarantees linear parsing time even for pathological cases, +// at the expense of more memory and slower times for typical cases. +// +// The default is false. +func Memoize(b bool) Option { + return func(p *parser) Option { + old := p.memoize + p.memoize = b + return Memoize(old) + } +} + +// AllowInvalidUTF8 creates an Option to allow invalid UTF-8 bytes. +// Every invalid UTF-8 byte is treated as a utf8.RuneError (U+FFFD) +// by character class matchers and is matched by the any matcher. +// The returned matched value, c.text and c.offset are NOT affected. +// +// The default is false. +func AllowInvalidUTF8(b bool) Option { + return func(p *parser) Option { + old := p.allowInvalidUTF8 + p.allowInvalidUTF8 = b + return AllowInvalidUTF8(old) + } +} + +// Recover creates an Option to set the recover flag to b. When set to +// true, this causes the parser to recover from panics and convert it +// to an error. Setting it to false can be useful while debugging to +// access the full stack trace. +// +// The default is true. +func Recover(b bool) Option { + return func(p *parser) Option { + old := p.recover + p.recover = b + return Recover(old) + } +} + +// GlobalStore creates an Option to set a key to a certain value in +// the globalStore. +func GlobalStore(key string, value any) Option { + return func(p *parser) Option { + old := p.cur.globalStore[key] + p.cur.globalStore[key] = value + return GlobalStore(key, old) + } +} + +// InitState creates an Option to set a key to a certain value in +// the global "state" store. +func InitState(key string, value any) Option { + return func(p *parser) Option { + old := p.cur.state[key] + p.cur.state[key] = value + return InitState(key, old) + } +} + +// ParseFile parses the file identified by filename. +func ParseFile(filename string, opts ...Option) (i any, err error) { // nolint: deadcode + f, err := os.Open(filename) + if err != nil { + return nil, err + } + defer func() { + if closeErr := f.Close(); closeErr != nil { + err = closeErr + } + }() + return ParseReader(filename, f, opts...) +} + +// ParseReader parses the data from r using filename as information in the +// error messages. +func ParseReader(filename string, r io.Reader, opts ...Option) (any, error) { // nolint: deadcode + b, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + return Parse(filename, b, opts...) +} + +// Parse parses the data from b using filename as information in the +// error messages. +func Parse(filename string, b []byte, opts ...Option) (any, error) { + return newParser(filename, b, opts...).parse(g) +} + +// position records a position in the text. +type position struct { + line, col, offset int +} + +func (p position) String() string { + return strconv.Itoa(p.line) + ":" + strconv.Itoa(p.col) + " [" + strconv.Itoa(p.offset) + "]" +} + +// savepoint stores all state required to go back to this point in the +// parser. +type savepoint struct { + position + rn rune + w int +} + +type current struct { + pos position // start position of the match + text []byte // raw text of the match + + // state is a store for arbitrary key,value pairs that the user wants to be + // tied to the backtracking of the parser. + // This is always rolled back if a parsing rule fails. + state storeDict + + // globalStore is a general store for the user to store arbitrary key-value + // pairs that they need to manage and that they do not want tied to the + // backtracking of the parser. This is only modified by the user and never + // rolled back by the parser. It is always up to the user to keep this in a + // consistent state. + globalStore storeDict +} + +type storeDict map[string]any + +// the AST types... + +// nolint: structcheck +type grammar struct { + pos position + rules []*rule +} + +// nolint: structcheck +type rule struct { + pos position + name string + displayName string + expr any + + leader bool + leftRecursive bool +} + +// nolint: structcheck +type choiceExpr struct { + pos position + alternatives []any +} + +// nolint: structcheck +type actionExpr struct { + pos position + expr any + run func(*parser) (any, error) +} + +// nolint: structcheck +type recoveryExpr struct { + pos position + expr any + recoverExpr any + failureLabel []string +} + +// nolint: structcheck +type seqExpr struct { + pos position + exprs []any +} + +// nolint: structcheck +type throwExpr struct { + pos position + label string +} + +// nolint: structcheck +type labeledExpr struct { + pos position + label string + expr any +} + +// nolint: structcheck +type expr struct { + pos position + expr any +} + +type ( + andExpr expr // nolint: structcheck + notExpr expr // nolint: structcheck + zeroOrOneExpr expr // nolint: structcheck + zeroOrMoreExpr expr // nolint: structcheck + oneOrMoreExpr expr // nolint: structcheck +) + +// nolint: structcheck +type ruleRefExpr struct { + pos position + name string +} + +// nolint: structcheck +type stateCodeExpr struct { + pos position + run func(*parser) error +} + +// nolint: structcheck +type andCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +// nolint: structcheck +type notCodeExpr struct { + pos position + run func(*parser) (bool, error) +} + +// nolint: structcheck +type litMatcher struct { + pos position + val string + ignoreCase bool + want string +} + +// nolint: structcheck +type charClassMatcher struct { + pos position + val string + basicLatinChars [128]bool + chars []rune + ranges []rune + classes []*unicode.RangeTable + ignoreCase bool + inverted bool +} + +type anyMatcher position // nolint: structcheck + +// errList cumulates the errors found by the parser. +type errList []error + +func (e *errList) add(err error) { + *e = append(*e, err) +} + +func (e errList) err() error { + if len(e) == 0 { + return nil + } + e.dedupe() + return e +} + +func (e *errList) dedupe() { + var cleaned []error + set := make(map[string]bool) + for _, err := range *e { + if msg := err.Error(); !set[msg] { + set[msg] = true + cleaned = append(cleaned, err) + } + } + *e = cleaned +} + +func (e errList) Error() string { + switch len(e) { + case 0: + return "" + case 1: + return e[0].Error() + default: + var buf bytes.Buffer + + for i, err := range e { + if i > 0 { + buf.WriteRune('\n') + } + buf.WriteString(err.Error()) + } + return buf.String() + } +} + +// parserError wraps an error with a prefix indicating the rule in which +// the error occurred. The original error is stored in the Inner field. +type parserError struct { + Inner error + pos position + prefix string + expected []string +} + +// Error returns the error message. +func (p *parserError) Error() string { + return p.prefix + ": " + p.Inner.Error() +} + +// newParser creates a parser with the specified input source and options. +func newParser(filename string, b []byte, opts ...Option) *parser { + stats := Stats{ + ChoiceAltCnt: make(map[string]map[string]int), + } + + p := &parser{ + filename: filename, + errs: new(errList), + data: b, + pt: savepoint{position: position{line: 1}}, + recover: true, + cur: current{ + state: make(storeDict), + globalStore: make(storeDict), + }, + maxFailPos: position{col: 1, line: 1}, + maxFailExpected: make([]string, 0, 20), + Stats: &stats, + // start rule is rule [0] unless an alternate entrypoint is specified + entrypoint: g.rules[0].name, + } + p.setOptions(opts) + + if p.maxExprCnt == 0 { + p.maxExprCnt = math.MaxUint64 + } + + return p +} + +// setOptions applies the options to the parser. +func (p *parser) setOptions(opts []Option) { + for _, opt := range opts { + opt(p) + } +} + +// nolint: structcheck,deadcode +type resultTuple struct { + v any + b bool + end savepoint +} + +// nolint: varcheck +const choiceNoMatch = -1 + +// Stats stores some statistics, gathered during parsing +type Stats struct { + // ExprCnt counts the number of expressions processed during parsing + // This value is compared to the maximum number of expressions allowed + // (set by the MaxExpressions option). + ExprCnt uint64 + + // ChoiceAltCnt is used to count for each ordered choice expression, + // which alternative is used how may times. + // These numbers allow to optimize the order of the ordered choice expression + // to increase the performance of the parser + // + // The outer key of ChoiceAltCnt is composed of the name of the rule as well + // as the line and the column of the ordered choice. + // The inner key of ChoiceAltCnt is the number (one-based) of the matching alternative. + // For each alternative the number of matches are counted. If an ordered choice does not + // match, a special counter is incremented. The name of this counter is set with + // the parser option Statistics. + // For an alternative to be included in ChoiceAltCnt, it has to match at least once. + ChoiceAltCnt map[string]map[string]int +} + +type ruleWithExpsStack struct { + rule *rule + estack []any +} + +// nolint: structcheck,maligned +type parser struct { + filename string + pt savepoint + cur current + + data []byte + errs *errList + + depth int + recover bool + debug bool + + memoize bool + // memoization table for the packrat algorithm: + // map[offset in source] map[expression or rule] {value, match} + memo map[int]map[any]resultTuple + + // rules table, maps the rule identifier to the rule node + rules map[string]*rule + // variables stack, map of label to value + vstack []map[string]any + // rule stack, allows identification of the current rule in errors + rstack []*rule + + // parse fail + maxFailPos position + maxFailExpected []string + maxFailInvertExpected bool + + // max number of expressions to be parsed + maxExprCnt uint64 + // entrypoint for the parser + entrypoint string + + allowInvalidUTF8 bool + + *Stats + + choiceNoMatch string + // recovery expression stack, keeps track of the currently available recovery expression, these are traversed in reverse + recoveryStack []map[string]any +} + +// push a variable set on the vstack. +func (p *parser) pushV() { + if cap(p.vstack) == len(p.vstack) { + // create new empty slot in the stack + p.vstack = append(p.vstack, nil) + } else { + // slice to 1 more + p.vstack = p.vstack[:len(p.vstack)+1] + } + + // get the last args set + m := p.vstack[len(p.vstack)-1] + if m != nil && len(m) == 0 { + // empty map, all good + return + } + + m = make(map[string]any) + p.vstack[len(p.vstack)-1] = m +} + +// pop a variable set from the vstack. +func (p *parser) popV() { + // if the map is not empty, clear it + m := p.vstack[len(p.vstack)-1] + if len(m) > 0 { + // GC that map + p.vstack[len(p.vstack)-1] = nil + } + p.vstack = p.vstack[:len(p.vstack)-1] +} + +// push a recovery expression with its labels to the recoveryStack +func (p *parser) pushRecovery(labels []string, expr any) { + if cap(p.recoveryStack) == len(p.recoveryStack) { + // create new empty slot in the stack + p.recoveryStack = append(p.recoveryStack, nil) + } else { + // slice to 1 more + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)+1] + } + + m := make(map[string]any, len(labels)) + for _, fl := range labels { + m[fl] = expr + } + p.recoveryStack[len(p.recoveryStack)-1] = m +} + +// pop a recovery expression from the recoveryStack +func (p *parser) popRecovery() { + // GC that map + p.recoveryStack[len(p.recoveryStack)-1] = nil + + p.recoveryStack = p.recoveryStack[:len(p.recoveryStack)-1] +} + +func (p *parser) print(prefix, s string) string { + if !p.debug { + return s + } + + fmt.Printf("%s %d:%d:%d: %s [%#U]\n", + prefix, p.pt.line, p.pt.col, p.pt.offset, s, p.pt.rn) + return s +} + +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + +func (p *parser) in(s string) string { + res := p.printIndent(">", s) + p.depth++ + return res +} + +func (p *parser) out(s string) string { + p.depth-- + return p.printIndent("<", s) +} + +func (p *parser) addErr(err error) { + p.addErrAt(err, p.pt.position, []string{}) +} + +func (p *parser) addErrAt(err error, pos position, expected []string) { + var buf bytes.Buffer + if p.filename != "" { + buf.WriteString(p.filename) + } + if buf.Len() > 0 { + buf.WriteString(":") + } + buf.WriteString(fmt.Sprintf("%d:%d (%d)", pos.line, pos.col, pos.offset)) + if len(p.rstack) > 0 { + if buf.Len() > 0 { + buf.WriteString(": ") + } + rule := p.rstack[len(p.rstack)-1] + if rule.displayName != "" { + buf.WriteString("rule " + rule.displayName) + } else { + buf.WriteString("rule " + rule.name) + } + } + pe := &parserError{Inner: err, pos: pos, prefix: buf.String(), expected: expected} + p.errs.add(pe) +} + +func (p *parser) failAt(fail bool, pos position, want string) { + // process fail if parsing fails and not inverted or parsing succeeds and invert is set + if fail == p.maxFailInvertExpected { + if pos.offset < p.maxFailPos.offset { + return + } + + if pos.offset > p.maxFailPos.offset { + p.maxFailPos = pos + p.maxFailExpected = p.maxFailExpected[:0] + } + + if p.maxFailInvertExpected { + want = "!" + want + } + p.maxFailExpected = append(p.maxFailExpected, want) + } +} + +// read advances the parser to the next rune. +func (p *parser) read() { + p.pt.offset += p.pt.w + rn, n := utf8.DecodeRune(p.data[p.pt.offset:]) + p.pt.rn = rn + p.pt.w = n + p.pt.col++ + if rn == '\n' { + p.pt.line++ + p.pt.col = 0 + } + + if rn == utf8.RuneError && n == 1 { // see utf8.DecodeRune + if !p.allowInvalidUTF8 { + p.addErr(errInvalidEncoding) + } + } +} + +// restore parser position to the savepoint pt. +func (p *parser) restore(pt savepoint) { + if p.debug { + defer p.out(p.in("restore")) + } + if pt.offset == p.pt.offset { + return + } + p.pt = pt +} + +// Cloner is implemented by any value that has a Clone method, which returns a +// copy of the value. This is mainly used for types which are not passed by +// value (e.g map, slice, chan) or structs that contain such types. +// +// This is used in conjunction with the global state feature to create proper +// copies of the state to allow the parser to properly restore the state in +// the case of backtracking. +type Cloner interface { + Clone() any +} + +var statePool = &sync.Pool{ + New: func() any { return make(storeDict) }, +} + +func (sd storeDict) Discard() { + for k := range sd { + delete(sd, k) + } + statePool.Put(sd) +} + +// clone and return parser current state. +func (p *parser) cloneState() storeDict { + if p.debug { + defer p.out(p.in("cloneState")) + } + + state := statePool.Get().(storeDict) + for k, v := range p.cur.state { + if c, ok := v.(Cloner); ok { + state[k] = c.Clone() + } else { + state[k] = v + } + } + return state +} + +// restore parser current state to the state storeDict. +// every restoreState should applied only one time for every cloned state +func (p *parser) restoreState(state storeDict) { + if p.debug { + defer p.out(p.in("restoreState")) + } + p.cur.state.Discard() + p.cur.state = state +} + +// get the slice of bytes from the savepoint start to the current position. +func (p *parser) sliceFrom(start savepoint) []byte { + return p.data[start.position.offset:p.pt.position.offset] +} + +func (p *parser) getMemoized(node any) (resultTuple, bool) { + if len(p.memo) == 0 { + return resultTuple{}, false + } + m := p.memo[p.pt.offset] + if len(m) == 0 { + return resultTuple{}, false + } + res, ok := m[node] + return res, ok +} + +func (p *parser) setMemoized(pt savepoint, node any, tuple resultTuple) { + if p.memo == nil { + p.memo = make(map[int]map[any]resultTuple) + } + m := p.memo[pt.offset] + if m == nil { + m = make(map[any]resultTuple) + p.memo[pt.offset] = m + } + m[node] = tuple +} + +func (p *parser) buildRulesTable(g *grammar) { + p.rules = make(map[string]*rule, len(g.rules)) + for _, r := range g.rules { + p.rules[r.name] = r + } +} + +// nolint: gocyclo +func (p *parser) parse(g *grammar) (val any, err error) { + if len(g.rules) == 0 { + p.addErr(errNoRule) + return nil, p.errs.err() + } + + // TODO : not super critical but this could be generated + p.buildRulesTable(g) + + if p.recover { + // panic can be used in action code to stop parsing immediately + // and return the panic as an error. + defer func() { + if e := recover(); e != nil { + if p.debug { + defer p.out(p.in("panic handler")) + } + val = nil + switch e := e.(type) { + case error: + p.addErr(e) + default: + p.addErr(fmt.Errorf("%v", e)) + } + err = p.errs.err() + } + }() + } + + startRule, ok := p.rules[p.entrypoint] + if !ok { + p.addErr(errInvalidEntrypoint) + return nil, p.errs.err() + } + + p.read() // advance to first rune + val, ok = p.parseRuleWrap(startRule) + if !ok { + if len(*p.errs) == 0 { + // If parsing fails, but no errors have been recorded, the expected values + // for the farthest parser position are returned as error. + maxFailExpectedMap := make(map[string]struct{}, len(p.maxFailExpected)) + for _, v := range p.maxFailExpected { + maxFailExpectedMap[v] = struct{}{} + } + expected := make([]string, 0, len(maxFailExpectedMap)) + eof := false + if _, ok := maxFailExpectedMap["!."]; ok { + delete(maxFailExpectedMap, "!.") + eof = true + } + for k := range maxFailExpectedMap { + expected = append(expected, k) + } + sort.Strings(expected) + if eof { + expected = append(expected, "EOF") + } + p.addErrAt(errors.New("no match found, expected: "+listJoin(expected, ", ", "or")), p.maxFailPos, expected) + } + + return nil, p.errs.err() + } + return val, p.errs.err() +} + +func listJoin(list []string, sep string, lastSep string) string { + switch len(list) { + case 0: + return "" + case 1: + return list[0] + default: + return strings.Join(list[:len(list)-1], sep) + " " + lastSep + " " + list[len(list)-1] + } +} + +func (p *parser) parseRuleRecursiveLeader(rule *rule) (any, bool) { + result, ok := p.getMemoized(rule) + if ok { + p.restore(result.end) + return result.v, result.b + } + + if p.debug { + defer p.out(p.in("recursive " + rule.name)) + } + + var ( + depth = 0 + startMark = p.pt + lastResult = resultTuple{nil, false, startMark} + lastErrors = *p.errs + ) + + for { + lastState := p.cloneState() + p.setMemoized(startMark, rule, lastResult) + val, ok := p.parseRule(rule) + endMark := p.pt + if p.debug { + p.printIndent("RECURSIVE", fmt.Sprintf( + "Rule %s depth %d: %t -> %s", + rule.name, depth, ok, string(p.sliceFrom(startMark)))) + } + if (!ok) || (endMark.offset <= lastResult.end.offset && depth != 0) { + p.restoreState(lastState) + *p.errs = lastErrors + break + } + lastResult = resultTuple{val, ok, endMark} + lastErrors = *p.errs + p.restore(startMark) + depth++ + } + + p.restore(lastResult.end) + p.setMemoized(startMark, rule, lastResult) + return lastResult.v, lastResult.b +} + +func (p *parser) parseRuleRecursiveNoLeader(rule *rule) (any, bool) { + return p.parseRule(rule) +} + +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { + if p.debug { + defer p.out(p.in("parseRule " + rule.name)) + } + var ( + val any + ok bool + startMark = p.pt + ) + + if p.memoize || rule.leftRecursive { + if rule.leader { + val, ok = p.parseRuleRecursiveLeader(rule) + } else if p.memoize && !rule.leftRecursive { + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRuleRecursiveNoLeader(rule) + } + } else { + val, ok = p.parseRule(rule) + } + + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { + p.rstack = append(p.rstack, rule) + p.pushV() + val, ok := p.parseExprWrap(rule.expr) + p.popV() + p.rstack = p.rstack[:len(p.rstack)-1] + return val, ok +} + +func (p *parser) parseExprWrap(expr any) (any, bool) { + var pt savepoint + + isLeftRecusion := p.rstack[len(p.rstack)-1].leftRecursive + if p.memoize && !isLeftRecusion { + res, ok := p.getMemoized(expr) + if ok { + p.restore(res.end) + return res.v, res.b + } + pt = p.pt + } + + val, ok := p.parseExpr(expr) + + if p.memoize && !isLeftRecusion { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { + p.ExprCnt++ + if p.ExprCnt > p.maxExprCnt { + panic(errMaxExprCnt) + } + + var val any + var ok bool + switch expr := expr.(type) { + case *actionExpr: + val, ok = p.parseActionExpr(expr) + case *andCodeExpr: + val, ok = p.parseAndCodeExpr(expr) + case *andExpr: + val, ok = p.parseAndExpr(expr) + case *anyMatcher: + val, ok = p.parseAnyMatcher(expr) + case *charClassMatcher: + val, ok = p.parseCharClassMatcher(expr) + case *choiceExpr: + val, ok = p.parseChoiceExpr(expr) + case *labeledExpr: + val, ok = p.parseLabeledExpr(expr) + case *litMatcher: + val, ok = p.parseLitMatcher(expr) + case *notCodeExpr: + val, ok = p.parseNotCodeExpr(expr) + case *notExpr: + val, ok = p.parseNotExpr(expr) + case *oneOrMoreExpr: + val, ok = p.parseOneOrMoreExpr(expr) + case *recoveryExpr: + val, ok = p.parseRecoveryExpr(expr) + case *ruleRefExpr: + val, ok = p.parseRuleRefExpr(expr) + case *seqExpr: + val, ok = p.parseSeqExpr(expr) + case *stateCodeExpr: + val, ok = p.parseStateCodeExpr(expr) + case *throwExpr: + val, ok = p.parseThrowExpr(expr) + case *zeroOrMoreExpr: + val, ok = p.parseZeroOrMoreExpr(expr) + case *zeroOrOneExpr: + val, ok = p.parseZeroOrOneExpr(expr) + default: + panic(fmt.Sprintf("unknown expression type %T", expr)) + } + return val, ok +} + +func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseActionExpr")) + } + + start := p.pt + val, ok := p.parseExprWrap(act.expr) + if ok { + p.cur.pos = start.position + p.cur.text = p.sliceFrom(start) + state := p.cloneState() + actVal, err := act.run(p) + if err != nil { + p.addErrAt(err, start.position, []string{}) + } + p.restoreState(state) + + val = actVal + } + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(start))) + } + return val, ok +} + +func (p *parser) parseAndCodeExpr(and *andCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseAndCodeExpr")) + } + + state := p.cloneState() + + ok, err := and.run(p) + if err != nil { + p.addErr(err) + } + p.restoreState(state) + + return nil, ok +} + +func (p *parser) parseAndExpr(and *andExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseAndExpr")) + } + + pt := p.pt + state := p.cloneState() + p.pushV() + _, ok := p.parseExprWrap(and.expr) + p.popV() + p.restoreState(state) + p.restore(pt) + + return nil, ok +} + +func (p *parser) parseAnyMatcher(any *anyMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseAnyMatcher")) + } + + if p.pt.rn == utf8.RuneError && p.pt.w == 0 { + // EOF - see utf8.DecodeRune + p.failAt(false, p.pt.position, ".") + return nil, false + } + start := p.pt + p.read() + p.failAt(true, start.position, ".") + return p.sliceFrom(start), true +} + +// nolint: gocyclo +func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseCharClassMatcher")) + } + + cur := p.pt.rn + start := p.pt + + // can't match EOF + if cur == utf8.RuneError && p.pt.w == 0 { // see utf8.DecodeRune + p.failAt(false, start.position, chr.val) + return nil, false + } + + if chr.ignoreCase { + cur = unicode.ToLower(cur) + } + + // try to match in the list of available chars + for _, rn := range chr.chars { + if rn == cur { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of ranges + for i := 0; i < len(chr.ranges); i += 2 { + if cur >= chr.ranges[i] && cur <= chr.ranges[i+1] { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + // try to match in the list of Unicode classes + for _, cl := range chr.classes { + if unicode.Is(cl, cur) { + if chr.inverted { + p.failAt(false, start.position, chr.val) + return nil, false + } + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + } + + if chr.inverted { + p.read() + p.failAt(true, start.position, chr.val) + return p.sliceFrom(start), true + } + p.failAt(false, start.position, chr.val) + return nil, false +} + +func (p *parser) incChoiceAltCnt(ch *choiceExpr, altI int) { + choiceIdent := fmt.Sprintf("%s %d:%d", p.rstack[len(p.rstack)-1].name, ch.pos.line, ch.pos.col) + m := p.ChoiceAltCnt[choiceIdent] + if m == nil { + m = make(map[string]int) + p.ChoiceAltCnt[choiceIdent] = m + } + // We increment altI by 1, so the keys do not start at 0 + alt := strconv.Itoa(altI + 1) + if altI == choiceNoMatch { + alt = p.choiceNoMatch + } + m[alt]++ +} + +func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseChoiceExpr")) + } + + for altI, alt := range ch.alternatives { + // dummy assignment to prevent compile error if optimized + _ = altI + + state := p.cloneState() + + p.pushV() + val, ok := p.parseExprWrap(alt) + p.popV() + if ok { + p.incChoiceAltCnt(ch, altI) + return val, ok + } + p.restoreState(state) + } + p.incChoiceAltCnt(ch, choiceNoMatch) + return nil, false +} + +func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseLabeledExpr")) + } + + p.pushV() + val, ok := p.parseExprWrap(lab.expr) + p.popV() + if ok && lab.label != "" { + m := p.vstack[len(p.vstack)-1] + m[lab.label] = val + } + return val, ok +} + +func (p *parser) parseLitMatcher(lit *litMatcher) (any, bool) { + if p.debug { + defer p.out(p.in("parseLitMatcher")) + } + + start := p.pt + for _, want := range lit.val { + cur := p.pt.rn + if lit.ignoreCase { + cur = unicode.ToLower(cur) + } + if cur != want { + p.failAt(false, start.position, lit.want) + p.restore(start) + return nil, false + } + p.read() + } + p.failAt(true, start.position, lit.want) + return p.sliceFrom(start), true +} + +func (p *parser) parseNotCodeExpr(not *notCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseNotCodeExpr")) + } + + state := p.cloneState() + + ok, err := not.run(p) + if err != nil { + p.addErr(err) + } + p.restoreState(state) + + return nil, !ok +} + +func (p *parser) parseNotExpr(not *notExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseNotExpr")) + } + + pt := p.pt + state := p.cloneState() + p.pushV() + p.maxFailInvertExpected = !p.maxFailInvertExpected + _, ok := p.parseExprWrap(not.expr) + p.maxFailInvertExpected = !p.maxFailInvertExpected + p.popV() + p.restoreState(state) + p.restore(pt) + + return nil, !ok +} + +func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseOneOrMoreExpr")) + } + + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + if len(vals) == 0 { + // did not match once, no match + return nil, false + } + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseRecoveryExpr (" + strings.Join(recover.failureLabel, ",") + ")")) + } + + p.pushRecovery(recover.failureLabel, recover.recoverExpr) + val, ok := p.parseExprWrap(recover.expr) + p.popRecovery() + + return val, ok +} + +func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseRuleRefExpr " + ref.name)) + } + + if ref.name == "" { + panic(fmt.Sprintf("%s: invalid rule: missing name", ref.pos)) + } + + rule := p.rules[ref.name] + if rule == nil { + p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) + return nil, false + } + return p.parseRuleWrap(rule) +} + +func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseSeqExpr")) + } + + vals := make([]any, 0, len(seq.exprs)) + + pt := p.pt + state := p.cloneState() + for _, expr := range seq.exprs { + val, ok := p.parseExprWrap(expr) + if !ok { + p.restoreState(state) + p.restore(pt) + return nil, false + } + vals = append(vals, val) + } + return vals, true +} + +func (p *parser) parseStateCodeExpr(state *stateCodeExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseStateCodeExpr")) + } + + err := state.run(p) + if err != nil { + p.addErr(err) + } + return nil, true +} + +func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseThrowExpr")) + } + + for i := len(p.recoveryStack) - 1; i >= 0; i-- { + if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { + return val, ok + } + } + } + + return nil, false +} + +func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseZeroOrMoreExpr")) + } + + var vals []any + + for { + p.pushV() + val, ok := p.parseExprWrap(expr.expr) + p.popV() + if !ok { + return vals, true + } + vals = append(vals, val) + } +} + +func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { + if p.debug { + defer p.out(p.in("parseZeroOrOneExpr")) + } + + p.pushV() + val, _ := p.parseExprWrap(expr.expr) + p.popV() + // whether it matched or not, consider it a match + return val, true +} diff --git a/test/left_recursion_thrownrecover/left_recursion_thrownrecover.peg b/test/left_recursion_thrownrecover/left_recursion_thrownrecover.peg new file mode 100644 index 0000000..bdd5f72 --- /dev/null +++ b/test/left_recursion_thrownrecover/left_recursion_thrownrecover.peg @@ -0,0 +1,106 @@ +{ +package leftrecursionthrownrecover + +} + +Start = &{return false, nil} + + +// Case 01: Multiple Label Recover + +case01 = case01:MultiLabelRecover { return case01, nil } + +MultiLabelRecover = number //{errAlpha, errOther} ErrNonNumber + +number = !. / (n:number d:digit) { + return n.(string) + d.(string), nil +} / d:digit { + return d.(string), nil +} + +digit = [0-9] { + return string(c.text), nil +} / x:( &[a-z] %{errAlpha} ) { + return x.([]any)[1], nil +} / %{errOther} + +ErrNonNumber = &{ + return true, errors.New("expecting a number") +} ( ![0-9] . )* { return "?", nil } + + +// Case 02: Throw Undefined Label + +case02 = (ThrowUndefLabel / &{ return false, errors.New("Throwed undefined label") }) + +ThrowUndefLabel = ThrowUndefLabel %{undeflabel} + + +// Case 03: Nested Recover + +case03 = case03:OuterRecover03 { return case03, nil } + +OuterRecover03 = InnerRecover03 //{errAlphaLower, errAlphaUpper} ErrAlphaOuter03 //{errOther} ErrOtherOuter03 + +InnerRecover03 = number03 //{errAlphaLower} ErrAlphaInner03 + +number03 = !. / (n:number03 d:digit03) { + return n.(string) + d.(string), nil +} / d:digit03 { + return d.(string), nil +} + +digit03 = [0-9] { + return string(c.text), nil +} / x:( &[a-z] %{errAlphaLower} ) { + return x.([]any)[1], nil +} / x:( &[A-Z] %{errAlphaUpper} ) { + return x.([]any)[1], nil +} / %{errOther} + +ErrAlphaInner03 = &{ + return true, errors.New("expecting a number, got lower case char") +} ( ![0-9] . )* { return "<", nil } + +ErrAlphaOuter03 = &{ + return true, errors.New("expecting a number, got upper case char") +} ( ![0-9] . )* { return ">", nil } + +ErrOtherOuter03 = &{ + return true, errors.New("expecting a number, got a non-char") +} ( ![0-9] . )* { return "?", nil } + + +// Case 04: Nested Recover, which fails in inner recover + +case04 = case04:OuterRecover04 { return case04, nil } + +OuterRecover04 = InnerRecover04 //{errAlphaLower, errAlphaUpper} ErrAlphaOuter04 //{errOther} ErrOtherOuter04 + +InnerRecover04 = number04 //{errAlphaLower} ErrAlphaInner04 + +number04 = !. / (n:number04 d:digit04) { + return n.(string) + d.(string), nil +} / d:digit04 { + return d.(string), nil +} + +digit04 = [0-9] { + return string(c.text), nil +} / x:( &[a-z] %{errAlphaLower} ) { + return x.([]any)[1], nil +} / x:( &[A-Z] %{errAlphaUpper} ) { + return x.([]any)[1], nil +} / %{errOther} + +ErrAlphaInner04 = &{ + return false, nil +} + +ErrAlphaOuter04 = &{ + return true, errors.New("expecting a number, got a char") +} ( ![0-9] . )* { return "x", nil } + +ErrOtherOuter04 = &{ + return true, errors.New("expecting a number, got a non-char") +} ( ![0-9] . )* { return "?", nil } diff --git a/test/left_recursion_thrownrecover/left_recursion_thrownrecover_test.go b/test/left_recursion_thrownrecover/left_recursion_thrownrecover_test.go new file mode 100644 index 0000000..e141f79 --- /dev/null +++ b/test/left_recursion_thrownrecover/left_recursion_thrownrecover_test.go @@ -0,0 +1,221 @@ +package leftrecursionthrownrecover_test + +import ( + "errors" + "reflect" + "testing" + + leftrecursionthrownrecover "github.com/mna/pigeon/test/left_recursion_thrownrecover" +) + +func TestLeftRecursionWithThrowAndRecover(t *testing.T) { + t.Parallel() + + type want struct { + captures any + errors []string + } + + cases := []struct { + name string + entrypoint string + input string + want want + }{ + // Case 01: Recover multiple labels + { + name: "Case 01: Recover multiple labels[correct]", + entrypoint: "case01", + input: "123", + want: want{captures: "123"}, + }, + { + name: "Case 01: Recover multiple labels[second character is not a number]", + entrypoint: "case01", + input: "1a3", + want: want{ + captures: "1?3", + errors: []string{ + "1:2 (1): rule ErrNonNumber: expecting a number", + }, + }, + }, + { + name: "Case 01: Recover multiple labels[third character is not a number]", + entrypoint: "case01", + input: "11+3", + want: want{ + captures: "11?3", + errors: []string{ + "1:3 (2): rule ErrNonNumber: expecting a number", + }, + }, + }, + + // Case 02: Throw a undefined label + { + name: "Case 02: Throw a undefined label", + entrypoint: "case02", + input: "", + want: want{ + captures: nil, + errors: []string{ + "1:1 (0): rule case02: Throwed undefined label", + }, + }, + }, + + // Case 03: Nested Recover + { + name: "Case 03: Nested Recover[correct]", + entrypoint: "case03", + input: "123", + want: want{captures: "123"}, + }, + { + name: "Case 03: Nested Recover[second character is lower case char]", + entrypoint: "case03", + input: "1a3", + want: want{ + captures: "1<3", + errors: []string{ + "1:2 (1): rule ErrAlphaInner03: expecting a number, got lower case char", + }, + }, + }, + { + name: "Case 03: Nested Recover[third character is upper case char]", + entrypoint: "case03", + input: "11A3", + want: want{ + captures: "11>3", + errors: []string{ + "1:3 (2): rule ErrAlphaOuter03: expecting a number, got upper case char", + }, + }, + }, + { + name: "Case 03: Nested Recover[fourth character is non-char]", + entrypoint: "case03", + input: "111+3", + want: want{ + captures: "111?3", + errors: []string{ + "1:4 (3): rule ErrOtherOuter03: expecting a number, got a non-char", + }, + }, + }, + + // Case 04: Nested Recover, which fails in inner recover + { + name: "Case 04: Nested Recover, which fails in inner recover[correct]", + entrypoint: "case04", + input: "123", + want: want{captures: "123"}, + }, + { + name: "Case 04: Nested Recover, which fails in inner recover[second character is lower case char]", + entrypoint: "case04", + input: "1a3", + want: want{ + captures: "1x3", + errors: []string{ + "1:2 (1): rule ErrAlphaOuter04: expecting a number, got a char", + }, + }, + }, + { + name: "Case 04: Nested Recover, which fails in inner recover[third character is upper case char]", + entrypoint: "case04", + input: "11A3", + want: want{ + captures: "11x3", + errors: []string{ + "1:3 (2): rule ErrAlphaOuter04: expecting a number, got a char", + }, + }, + }, + { + name: "Case 04: Nested Recover, which fails in inner recover[fourth character is non-char]", + entrypoint: "case04", + input: "111+3", + want: want{ + captures: "111?3", + errors: []string{ + "1:4 (3): rule ErrOtherOuter04: expecting a number, got a non-char", + }, + }, + }, + } + for _, testCase := range cases { + testCase := testCase + + setOptions := map[string][]leftrecursionthrownrecover.Option{ + "memoize": { + leftrecursionthrownrecover.Memoize(true), + leftrecursionthrownrecover.Entrypoint(testCase.entrypoint), + }, + "-": { + leftrecursionthrownrecover.Entrypoint(testCase.entrypoint), + }, + } + for nameOptions, options := range setOptions { + options := options + + t.Run(testCase.name+". Options: "+nameOptions, func(t *testing.T) { + t.Parallel() + + got, err := leftrecursionthrownrecover.Parse( + "", []byte(testCase.input), options...) + if testCase.want.errors == nil && err != nil { + t.Fatalf( + "for input %q got error: %s, but expect to parse without errors", + testCase.input, err) + } + if testCase.want.errors != nil && err == nil { + t.Fatalf( + "for input %q got no error, but expect to parse with errors: %s", + testCase.input, testCase.want.errors) + } + if !reflect.DeepEqual(got, testCase.want.captures) { + t.Errorf( + "for input %q want %s, got %s", + testCase.input, testCase.want.captures, got) + } + if err != nil { + var errorLister leftrecursionthrownrecover.ErrorLister + if !errors.As(err, &errorLister) { + t.FailNow() + } + list := errorLister.Errors() + if len(list) != len(testCase.want.errors) { + t.Errorf( + "for input %q want %d error(s), got %d", + testCase.input, len(testCase.want.errors), len(list)) + t.Logf("expected errors:\n") + for _, ee := range testCase.want.errors { + t.Logf("- %s\n", ee) + } + t.Logf("got errors:\n") + for _, ee := range list { + t.Logf("- %s\n", ee) + } + t.FailNow() + } + for index, err := range list { + var parserError leftrecursionthrownrecover.ParserError + if !errors.As(err, &parserError) { + t.FailNow() + } + if parserError.Error() != testCase.want.errors[index] { + t.Errorf( + "for input %q want %dth error to be %s, got %s", + testCase.input, index+1, + testCase.want.errors[index], parserError) + } + } + } + }) + } + } +} diff --git a/test/linear/linear.go b/test/linear/linear.go index db6940e..4c11c27 100644 --- a/test/linear/linear.go +++ b/test/linear/linear.go @@ -775,14 +775,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -984,7 +989,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1025,37 +1030,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -1067,6 +1087,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1114,9 +1144,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1126,7 +1153,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1140,7 +1167,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1169,7 +1196,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1287,7 +1314,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1305,7 +1332,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1361,7 +1388,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1379,7 +1406,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1398,7 +1425,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1418,7 +1445,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1431,7 +1458,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1461,7 +1488,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1479,7 +1506,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1494,7 +1521,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/max_expr_cnt/maxexpr.go b/test/max_expr_cnt/maxexpr.go index 5f337e4..29d7938 100644 --- a/test/max_expr_cnt/maxexpr.go +++ b/test/max_expr_cnt/maxexpr.go @@ -20,11 +20,53 @@ import ( var g = &grammar{ rules: []*rule{ { - name: "infinite_rule", + name: "long_rule1", pos: position{line: 6, col: 1, offset: 53}, expr: &ruleRefExpr{ - pos: position{line: 6, col: 17, offset: 69}, - name: "infinite_rule", + pos: position{line: 6, col: 14, offset: 66}, + name: "long_rule2", + }, + }, + { + name: "long_rule2", + pos: position{line: 7, col: 1, offset: 77}, + expr: &ruleRefExpr{ + pos: position{line: 7, col: 14, offset: 90}, + name: "long_rule3", + }, + }, + { + name: "long_rule3", + pos: position{line: 8, col: 1, offset: 101}, + expr: &ruleRefExpr{ + pos: position{line: 8, col: 14, offset: 114}, + name: "long_rule4", + }, + }, + { + name: "long_rule4", + pos: position{line: 9, col: 1, offset: 125}, + expr: &ruleRefExpr{ + pos: position{line: 9, col: 14, offset: 138}, + name: "long_rule5", + }, + }, + { + name: "long_rule5", + pos: position{line: 10, col: 1, offset: 149}, + expr: &ruleRefExpr{ + pos: position{line: 10, col: 14, offset: 162}, + name: "long_rule6", + }, + }, + { + name: "long_rule6", + pos: position{line: 11, col: 1, offset: 173}, + expr: &litMatcher{ + pos: position{line: 11, col: 14, offset: 186}, + val: " ", + ignoreCase: false, + want: "\" \"", }, }, }, @@ -610,14 +652,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -819,7 +866,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -860,37 +907,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -902,6 +964,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -949,9 +1021,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -961,7 +1030,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -975,7 +1044,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1004,7 +1073,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1122,7 +1191,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1140,7 +1209,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1196,7 +1265,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1214,7 +1283,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1233,7 +1302,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1253,7 +1322,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1266,7 +1335,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1296,7 +1365,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1314,7 +1383,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1329,7 +1398,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/max_expr_cnt/maxexpr.peg b/test/max_expr_cnt/maxexpr.peg index 804d250..49170b4 100644 --- a/test/max_expr_cnt/maxexpr.peg +++ b/test/max_expr_cnt/maxexpr.peg @@ -3,4 +3,9 @@ package maxexprcnt } // trigger an infinite parse -infinite_rule = infinite_rule \ No newline at end of file +long_rule1 = long_rule2 +long_rule2 = long_rule3 +long_rule3 = long_rule4 +long_rule4 = long_rule5 +long_rule5 = long_rule6 +long_rule6 = " " diff --git a/test/predicates/predicates.go b/test/predicates/predicates.go index 02e18cc..093fafd 100644 --- a/test/predicates/predicates.go +++ b/test/predicates/predicates.go @@ -829,14 +829,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -1038,7 +1043,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1079,37 +1084,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -1121,6 +1141,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1168,9 +1198,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1180,7 +1207,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1194,7 +1221,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1223,7 +1250,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1341,7 +1368,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1359,7 +1386,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1415,7 +1442,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1433,7 +1460,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1452,7 +1479,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1472,7 +1499,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1485,7 +1512,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1515,7 +1542,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1533,7 +1560,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1548,7 +1575,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/runeerror/runeerror.go b/test/runeerror/runeerror.go index 9018261..1186364 100644 --- a/test/runeerror/runeerror.go +++ b/test/runeerror/runeerror.go @@ -674,14 +674,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -883,7 +888,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -924,37 +929,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -966,6 +986,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1013,9 +1043,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1025,7 +1052,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1039,7 +1066,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1068,7 +1095,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1186,7 +1213,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1204,7 +1231,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1260,7 +1287,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1278,7 +1305,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1297,7 +1324,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1317,7 +1344,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1330,7 +1357,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1360,7 +1387,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1378,7 +1405,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1393,7 +1420,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/state/state.go b/test/state/state.go index 99e860e..b0ae391 100644 --- a/test/state/state.go +++ b/test/state/state.go @@ -757,14 +757,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -966,7 +971,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1007,37 +1012,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -1049,6 +1069,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1096,9 +1126,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1108,7 +1135,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1122,7 +1149,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1151,7 +1178,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1269,7 +1296,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1287,7 +1314,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1343,7 +1370,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1361,7 +1388,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1380,7 +1407,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1400,7 +1427,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1413,7 +1440,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1443,7 +1470,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1461,7 +1488,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1476,7 +1503,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/stateclone/stateclone.go b/test/stateclone/stateclone.go index 23310b2..02171b6 100644 --- a/test/stateclone/stateclone.go +++ b/test/stateclone/stateclone.go @@ -847,14 +847,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -1056,7 +1061,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1097,37 +1102,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -1139,6 +1159,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1186,9 +1216,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1198,7 +1225,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1212,7 +1239,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1241,7 +1268,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1359,7 +1386,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1377,7 +1404,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1433,7 +1460,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1451,7 +1478,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1470,7 +1497,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1490,7 +1517,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1503,7 +1530,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1533,7 +1560,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1551,7 +1578,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1566,7 +1593,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/statereadonly/statereadonly.go b/test/statereadonly/statereadonly.go index 0707428..137cd3a 100644 --- a/test/statereadonly/statereadonly.go +++ b/test/statereadonly/statereadonly.go @@ -828,14 +828,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -1037,7 +1042,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1078,37 +1083,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -1120,6 +1140,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1167,9 +1197,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1179,7 +1206,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1193,7 +1220,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1222,7 +1249,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1340,7 +1367,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1358,7 +1385,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1414,7 +1441,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1432,7 +1459,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1451,7 +1478,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1471,7 +1498,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1484,7 +1511,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1514,7 +1541,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1532,7 +1559,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1547,7 +1574,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/staterestore/optimized/staterestore.go b/test/staterestore/optimized/staterestore.go index 71e26e7..dca9c59 100644 --- a/test/staterestore/optimized/staterestore.go +++ b/test/staterestore/optimized/staterestore.go @@ -1350,7 +1350,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1391,18 +1391,34 @@ func listJoin(list []string, sep string, lastSep string) string { } } +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { + var ( + val any + ok bool + ) + + val, ok = p.parseRule(rule) + + return val, ok +} + func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] return val, ok } +func (p *parser) parseExprWrap(expr any) (any, bool) { + val, ok := p.parseExpr(expr) + + return val, ok +} + // nolint: gocyclo func (p *parser) parseExpr(expr any) (any, bool) { - p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1455,7 +1471,7 @@ func (p *parser) parseExpr(expr any) (any, bool) { func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1487,7 +1503,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1571,6 +1587,7 @@ func (p *parser) parseCharClassMatcher(chr *charClassMatcher) (any, bool) { } func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { + for altI, alt := range ch.alternatives { // dummy assignment to prevent compile error if optimized _ = altI @@ -1578,7 +1595,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { return val, ok @@ -1590,7 +1607,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1634,7 +1651,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1648,7 +1665,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1664,7 +1681,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1680,7 +1697,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1689,7 +1706,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1712,7 +1729,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1726,7 +1743,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1737,7 +1754,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/staterestore/standard/staterestore.go b/test/staterestore/standard/staterestore.go index b490e8b..300a4ef 100644 --- a/test/staterestore/standard/staterestore.go +++ b/test/staterestore/standard/staterestore.go @@ -927,14 +927,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -1136,7 +1141,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1177,37 +1182,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -1219,6 +1239,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1266,9 +1296,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1278,7 +1305,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1292,7 +1319,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1321,7 +1348,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1439,7 +1466,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1457,7 +1484,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1513,7 +1540,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1531,7 +1558,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1550,7 +1577,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1570,7 +1597,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1583,7 +1610,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1613,7 +1640,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1631,7 +1658,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1646,7 +1673,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/staterestore/staterestore.go b/test/staterestore/staterestore.go index b490e8b..300a4ef 100644 --- a/test/staterestore/staterestore.go +++ b/test/staterestore/staterestore.go @@ -927,14 +927,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -1136,7 +1141,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1177,37 +1182,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -1219,6 +1239,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -1266,9 +1296,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -1278,7 +1305,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -1292,7 +1319,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -1321,7 +1348,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -1439,7 +1466,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -1457,7 +1484,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -1513,7 +1540,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -1531,7 +1558,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -1550,7 +1577,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -1570,7 +1597,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -1583,7 +1610,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -1613,7 +1640,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -1631,7 +1658,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -1646,7 +1673,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/test/thrownrecover/thrownrecover.go b/test/thrownrecover/thrownrecover.go index eb59398..faf0788 100644 --- a/test/thrownrecover/thrownrecover.go +++ b/test/thrownrecover/thrownrecover.go @@ -1709,14 +1709,19 @@ func (p *parser) print(prefix, s string) string { return s } +func (p *parser) printIndent(mark string, s string) string { + return p.print(strings.Repeat(" ", p.depth)+mark, s) +} + func (p *parser) in(s string) string { + res := p.printIndent(">", s) p.depth++ - return p.print(strings.Repeat(" ", p.depth)+">", s) + return res } func (p *parser) out(s string) string { p.depth-- - return p.print(strings.Repeat(" ", p.depth)+"<", s) + return p.printIndent("<", s) } func (p *parser) addErr(err error) { @@ -1918,7 +1923,7 @@ func (p *parser) parse(g *grammar) (val any, err error) { } p.read() // advance to first rune - val, ok = p.parseRule(startRule) + val, ok = p.parseRuleWrap(startRule) if !ok { if len(*p.errs) == 0 { // If parsing fails, but no errors have been recorded, the expected values @@ -1959,37 +1964,52 @@ func listJoin(list []string, sep string, lastSep string) string { } } -func (p *parser) parseRule(rule *rule) (any, bool) { +func (p *parser) parseRuleMemoize(rule *rule) (any, bool) { + res, ok := p.getMemoized(rule) + if ok { + p.restore(res.end) + return res.v, res.b + } + + startMark := p.pt + val, ok := p.parseRule(rule) + p.setMemoized(startMark, rule, resultTuple{val, ok, p.pt}) + + return val, ok +} + +func (p *parser) parseRuleWrap(rule *rule) (any, bool) { if p.debug { defer p.out(p.in("parseRule " + rule.name)) } + var ( + val any + ok bool + startMark = p.pt + ) if p.memoize { - res, ok := p.getMemoized(rule) - if ok { - p.restore(res.end) - return res.v, res.b - } + val, ok = p.parseRuleMemoize(rule) + } else { + val, ok = p.parseRule(rule) } - start := p.pt + if ok && p.debug { + p.printIndent("MATCH", string(p.sliceFrom(startMark))) + } + return val, ok +} + +func (p *parser) parseRule(rule *rule) (any, bool) { p.rstack = append(p.rstack, rule) p.pushV() - val, ok := p.parseExpr(rule.expr) + val, ok := p.parseExprWrap(rule.expr) p.popV() p.rstack = p.rstack[:len(p.rstack)-1] - if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) - } - - if p.memoize { - p.setMemoized(start, rule, resultTuple{val, ok, p.pt}) - } return val, ok } -// nolint: gocyclo -func (p *parser) parseExpr(expr any) (any, bool) { +func (p *parser) parseExprWrap(expr any) (any, bool) { var pt savepoint if p.memoize { @@ -2001,6 +2021,16 @@ func (p *parser) parseExpr(expr any) (any, bool) { pt = p.pt } + val, ok := p.parseExpr(expr) + + if p.memoize { + p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) + } + return val, ok +} + +// nolint: gocyclo +func (p *parser) parseExpr(expr any) (any, bool) { p.ExprCnt++ if p.ExprCnt > p.maxExprCnt { panic(errMaxExprCnt) @@ -2048,9 +2078,6 @@ func (p *parser) parseExpr(expr any) (any, bool) { default: panic(fmt.Sprintf("unknown expression type %T", expr)) } - if p.memoize { - p.setMemoized(pt, expr, resultTuple{val, ok, p.pt}) - } return val, ok } @@ -2060,7 +2087,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { } start := p.pt - val, ok := p.parseExpr(act.expr) + val, ok := p.parseExprWrap(act.expr) if ok { p.cur.pos = start.position p.cur.text = p.sliceFrom(start) @@ -2074,7 +2101,7 @@ func (p *parser) parseActionExpr(act *actionExpr) (any, bool) { val = actVal } if ok && p.debug { - p.print(strings.Repeat(" ", p.depth)+"MATCH", string(p.sliceFrom(start))) + p.printIndent("MATCH", string(p.sliceFrom(start))) } return val, ok } @@ -2103,7 +2130,7 @@ func (p *parser) parseAndExpr(and *andExpr) (any, bool) { pt := p.pt state := p.cloneState() p.pushV() - _, ok := p.parseExpr(and.expr) + _, ok := p.parseExprWrap(and.expr) p.popV() p.restoreState(state) p.restore(pt) @@ -2221,7 +2248,7 @@ func (p *parser) parseChoiceExpr(ch *choiceExpr) (any, bool) { state := p.cloneState() p.pushV() - val, ok := p.parseExpr(alt) + val, ok := p.parseExprWrap(alt) p.popV() if ok { p.incChoiceAltCnt(ch, altI) @@ -2239,7 +2266,7 @@ func (p *parser) parseLabeledExpr(lab *labeledExpr) (any, bool) { } p.pushV() - val, ok := p.parseExpr(lab.expr) + val, ok := p.parseExprWrap(lab.expr) p.popV() if ok && lab.label != "" { m := p.vstack[len(p.vstack)-1] @@ -2295,7 +2322,7 @@ func (p *parser) parseNotExpr(not *notExpr) (any, bool) { state := p.cloneState() p.pushV() p.maxFailInvertExpected = !p.maxFailInvertExpected - _, ok := p.parseExpr(not.expr) + _, ok := p.parseExprWrap(not.expr) p.maxFailInvertExpected = !p.maxFailInvertExpected p.popV() p.restoreState(state) @@ -2313,7 +2340,7 @@ func (p *parser) parseOneOrMoreExpr(expr *oneOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { if len(vals) == 0 { @@ -2332,7 +2359,7 @@ func (p *parser) parseRecoveryExpr(recover *recoveryExpr) (any, bool) { } p.pushRecovery(recover.failureLabel, recover.recoverExpr) - val, ok := p.parseExpr(recover.expr) + val, ok := p.parseExprWrap(recover.expr) p.popRecovery() return val, ok @@ -2352,7 +2379,7 @@ func (p *parser) parseRuleRefExpr(ref *ruleRefExpr) (any, bool) { p.addErr(fmt.Errorf("undefined rule: %s", ref.name)) return nil, false } - return p.parseRule(rule) + return p.parseRuleWrap(rule) } func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { @@ -2365,7 +2392,7 @@ func (p *parser) parseSeqExpr(seq *seqExpr) (any, bool) { pt := p.pt state := p.cloneState() for _, expr := range seq.exprs { - val, ok := p.parseExpr(expr) + val, ok := p.parseExprWrap(expr) if !ok { p.restoreState(state) p.restore(pt) @@ -2395,7 +2422,7 @@ func (p *parser) parseThrowExpr(expr *throwExpr) (any, bool) { for i := len(p.recoveryStack) - 1; i >= 0; i-- { if recoverExpr, ok := p.recoveryStack[i][expr.label]; ok { - if val, ok := p.parseExpr(recoverExpr); ok { + if val, ok := p.parseExprWrap(recoverExpr); ok { return val, ok } } @@ -2413,7 +2440,7 @@ func (p *parser) parseZeroOrMoreExpr(expr *zeroOrMoreExpr) (any, bool) { for { p.pushV() - val, ok := p.parseExpr(expr.expr) + val, ok := p.parseExprWrap(expr.expr) p.popV() if !ok { return vals, true @@ -2428,7 +2455,7 @@ func (p *parser) parseZeroOrOneExpr(expr *zeroOrOneExpr) (any, bool) { } p.pushV() - val, _ := p.parseExpr(expr.expr) + val, _ := p.parseExprWrap(expr.expr) p.popV() // whether it matched or not, consider it a match return val, true diff --git a/testutils/testutils.go b/testutils/testutils.go new file mode 100644 index 0000000..9a6c3b7 --- /dev/null +++ b/testutils/testutils.go @@ -0,0 +1,164 @@ +// Copied from https://github.com/stretchr/testify + +// Copyright (c) 2012-2020 Mat Ryer, Tyler Bunnell and contributors. All rights reserved. +// Use of this source code is governed by an MIT-style license that can be found in +// the THIRD-PARTY-NOTICES file. + +package testutils + +import ( + "bytes" + "errors" + "reflect" +) + +// IsEmpty gets whether the specified object is considered empty or not. +func IsEmpty(object interface{}) bool { + // get nil case out of the way + if object == nil { + return true + } + + objValue := reflect.ValueOf(object) + + switch objValue.Kind() { + // collection types are empty when they have no element + case reflect.Chan, reflect.Map, reflect.Slice: + return objValue.Len() == 0 + // pointers are empty if nil or if the value they point to is empty + case reflect.Ptr: + if objValue.IsNil() { + return true + } + deref := objValue.Elem().Interface() + return IsEmpty(deref) + // for all other types, compare against the zero value + // array types are empty when they match their zero-initialized state + default: + zero := reflect.Zero(objValue.Type()) + return reflect.DeepEqual(object, zero.Interface()) + } +} + +// IsList checks that the provided value is array or slice. +func IsList(list interface{}) (ok bool) { + kind := reflect.TypeOf(list).Kind() + return kind == reflect.Array || kind == reflect.Slice +} + +// DiffLists diffs two arrays/slices and returns slices of elements that are only in A and only in B. +// If some element is present multiple times, each instance is counted separately (e.g. if something is 2x in A and +// 5x in B, it will be 0x in extraA and 3x in extraB). The order of items in both lists is ignored. +func DiffLists(listA, listB interface{}) (extraA, extraB []interface{}) { + aValue := reflect.ValueOf(listA) + bValue := reflect.ValueOf(listB) + + aLen := aValue.Len() + bLen := bValue.Len() + + // Mark indexes in bValue that we already used + visited := make([]bool, bLen) + for i := 0; i < aLen; i++ { + element := aValue.Index(i).Interface() + found := false + for j := 0; j < bLen; j++ { + if visited[j] { + continue + } + if ObjectsAreEqual(bValue.Index(j).Interface(), element) { + visited[j] = true + found = true + break + } + } + if !found { + extraA = append(extraA, element) + } + } + + for j := 0; j < bLen; j++ { + if visited[j] { + continue + } + extraB = append(extraB, bValue.Index(j).Interface()) + } + + return +} + +// ObjectsAreEqual determines if two objects are considered equal. +// +// This function does no assertion of any kind. +func ObjectsAreEqual(expected, actual interface{}) bool { + if expected == nil || actual == nil { + return expected == actual + } + + exp, ok := expected.([]byte) + if !ok { + return reflect.DeepEqual(expected, actual) + } + + act, ok := actual.([]byte) + if !ok { + return false + } + if exp == nil || act == nil { + return exp == nil && act == nil + } + return bytes.Equal(exp, act) +} + +// ElementsMatch asserts that the specified listA(array, slice...) is equal to specified +// listB(array, slice...) ignoring the order of the elements. If there are duplicate elements, +// the number of appearances of each of them in both lists should match. +// +// ElementsMatch([1, 3, 2, 3], [1, 3, 3, 2]). +func ElementsMatch(listA interface{}, listB interface{}) bool { + if IsEmpty(listA) && IsEmpty(listB) { + return true + } + + if !IsList(listA) || !IsList(listB) { + return false + } + + extraA, extraB := DiffLists(listA, listB) + + return len(extraA) == 0 && len(extraB) == 0 +} + +func isFunction(arg interface{}) bool { + if arg == nil { + return false + } + return reflect.TypeOf(arg).Kind() == reflect.Func +} + +// ValidateEqualArgs checks whether provided arguments can be safely used in the +// Equal/NotEqual functions. +func ValidateEqualArgs(expected, actual interface{}) error { + if expected == nil && actual == nil { + return nil + } + + if isFunction(expected) || isFunction(actual) { + return errors.New("cannot take func type as argument") + } + return nil +} + +// Equal asserts that two objects are equal. +// +// Equal(123, 123) +// +// Pointer variable equality is determined based on the equality of the +// referenced values (as opposed to the memory addresses). Function equality +// cannot be determined and will always fail. +func Equal(expected, actual interface{}, msgAndArgs ...interface{}) bool { + if err := ValidateEqualArgs(expected, actual); err != nil { + return false + } + + return ObjectsAreEqual(expected, actual) +} diff --git a/testutils/testutils_test.go b/testutils/testutils_test.go new file mode 100644 index 0000000..5a41b1e --- /dev/null +++ b/testutils/testutils_test.go @@ -0,0 +1,303 @@ +// Copied from https://github.com/stretchr/testify + +// Copyright (c) 2012-2020 Mat Ryer, Tyler Bunnell and contributors. All rights reserved. +// Use of this source code is governed by an MIT-style license that can be found in +// the THIRD-PARTY-NOTICES file. + +package testutils_test + +import ( + "errors" + "fmt" + "testing" + "time" + + "github.com/mna/pigeon/testutils" +) + +func TestIsEmpty(t *testing.T) { + t.Parallel() + + chWithValue := make(chan struct{}, 1) + chWithValue <- struct{}{} + + tests := []struct { + obj interface{} + want bool + }{ + {obj: "", want: true}, + {obj: nil, want: true}, + {obj: []string{}, want: true}, + {obj: 0, want: true}, + {obj: int32(0), want: true}, + {obj: int64(0), want: true}, + {obj: false, want: true}, + {obj: map[string]string{}, want: true}, + {obj: new(time.Time), want: true}, + {obj: time.Time{}, want: true}, + {obj: make(chan struct{}), want: true}, + {obj: [1]int{}, want: true}, + {obj: "something", want: false}, + {obj: errors.New("something"), want: false}, + {obj: []string{"something"}, want: false}, + {obj: 1, want: false}, + {obj: true, want: false}, + {obj: map[string]string{"Hello": "World"}, want: false}, + {obj: chWithValue, want: false}, + {obj: [1]int{42}, want: false}, + } + for _, test := range tests { + test := test + t.Run(fmt.Sprintf("IsEmpty(%#v)", test.obj), func(t *testing.T) { + t.Parallel() + + isEmpty := testutils.IsEmpty(test.obj) + if isEmpty != test.want { + t.Fatalf("IsEmpty(%#v) should return %v", test.obj, test.want) + } + }) + } +} + +func TestVlidateEqualArgs(t *testing.T) { + t.Parallel() + + if testutils.ValidateEqualArgs(func() {}, func() {}) == nil { + t.Error("non-nil functions should error") + } + + if testutils.ValidateEqualArgs(func() {}, func() {}) == nil { + t.Error("non-nil functions should error") + } + + if testutils.ValidateEqualArgs(nil, nil) != nil { + t.Error("nil functions are equal") + } +} + +func TestEqual(t *testing.T) { + t.Parallel() + + type myType string + + var m map[string]interface{} + + tests := []struct { + expected interface{} + actual interface{} + result bool + remark string + }{ + {"Hello World", "Hello World", true, ""}, + {123, 123, true, ""}, + {123.5, 123.5, true, ""}, + {[]byte("Hello World"), []byte("Hello World"), true, ""}, + {nil, nil, true, ""}, + {int32(123), int32(123), true, ""}, + {uint64(123), uint64(123), true, ""}, + {myType("1"), myType("1"), true, ""}, + {&struct{}{}, &struct{}{}, true, "pointer equality is based on equality of underlying value"}, + + // Not expected to be equal + {m["bar"], "something", false, ""}, + {myType("1"), myType("2"), false, ""}, + + // A case that might be confusing, especially with numeric literals + {10, uint(10), false, ""}, + } + + for _, test := range tests { + test := test + t.Run(fmt.Sprintf("Equal(%#v, %#v)", test.expected, test.actual), func(t *testing.T) { + t.Parallel() + + res := testutils.Equal(test.expected, test.actual) + if res != test.result { + t.Errorf( + "Equal(%#v, %#v) should return %#v: %s", + test.expected, test.actual, test.result, test.remark) + } + }) + } +} + +func TestDiffLists(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + listA interface{} + listB interface{} + extraA []interface{} + extraB []interface{} + }{ + { + name: "equal empty", + listA: []string{}, + listB: []string{}, + extraA: nil, + extraB: nil, + }, + { + name: "equal same order", + listA: []string{"hello", "world"}, + listB: []string{"hello", "world"}, + extraA: nil, + extraB: nil, + }, + { + name: "equal different order", + listA: []string{"hello", "world"}, + listB: []string{"world", "hello"}, + extraA: nil, + extraB: nil, + }, + { + name: "extra A", + listA: []string{"hello", "hello", "world"}, + listB: []string{"hello", "world"}, + extraA: []interface{}{"hello"}, + extraB: nil, + }, + { + name: "extra A twice", + listA: []string{"hello", "hello", "hello", "world"}, + listB: []string{"hello", "world"}, + extraA: []interface{}{"hello", "hello"}, + extraB: nil, + }, + { + name: "extra B", + listA: []string{"hello", "world"}, + listB: []string{"hello", "hello", "world"}, + extraA: nil, + extraB: []interface{}{"hello"}, + }, + { + name: "extra B twice", + listA: []string{"hello", "world"}, + listB: []string{"hello", "hello", "world", "hello"}, + extraA: nil, + extraB: []interface{}{"hello", "hello"}, + }, + { + name: "integers 1", + listA: []int{1, 2, 3, 4, 5}, + listB: []int{5, 4, 3, 2, 1}, + extraA: nil, + extraB: nil, + }, + { + name: "integers 2", + listA: []int{1, 2, 1, 2, 1}, + listB: []int{2, 1, 2, 1, 2}, + extraA: []interface{}{1}, + extraB: []interface{}{2}, + }, + } + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + actualExtraA, actualExtraB := testutils.DiffLists( + test.listA, test.listB) + if !testutils.Equal(test.extraA, actualExtraA) { + t.Errorf( + "extra A does not match for listA=%v listB=%v", + test.listA, test.listB) + } + if !testutils.Equal(test.extraB, actualExtraB) { + t.Errorf( + "extra B does not match for listA=%v listB=%v", + test.listA, test.listB) + } + }) + } +} + +func TestObjectsAreEqual(t *testing.T) { + t.Parallel() + + cases := []struct { + expected interface{} + actual interface{} + result bool + }{ + // cases that are expected to be equal + {"Hello World", "Hello World", true}, + {123, 123, true}, + {123.5, 123.5, true}, + {[]byte("Hello World"), []byte("Hello World"), true}, + {nil, nil, true}, + + // cases that are expected not to be equal + {map[int]int{5: 10}, map[int]int{10: 20}, false}, + {'x', "x", false}, + {"x", 'x', false}, + {0, 0.1, false}, + {0.1, 0, false}, + {time.Now, time.Now, false}, + {func() {}, func() {}, false}, + {uint32(10), int32(10), false}, + } + + for _, test := range cases { + test := test + t.Run(fmt.Sprintf("ObjectsAreEqual(%#v, %#v)", test.expected, test.actual), func(t *testing.T) { + t.Parallel() + + res := testutils.ObjectsAreEqual(test.expected, test.actual) + if res != test.result { + t.Errorf( + "ObjectsAreEqual(%#v, %#v) should return %#v", + test.expected, test.actual, test.result) + } + }) + } +} + +func TestElementsMatch(t *testing.T) { + t.Parallel() + + tests := []struct { + expected interface{} + actual interface{} + result bool + }{ + // matching + {nil, nil, true}, + + {nil, nil, true}, + {[]int{}, []int{}, true}, + {[]int{1}, []int{1}, true}, + {[]int{1, 1}, []int{1, 1}, true}, + {[]int{1, 2}, []int{1, 2}, true}, + {[]int{1, 2}, []int{2, 1}, true}, + {[2]int{1, 2}, [2]int{2, 1}, true}, + {[]string{"hello", "world"}, []string{"world", "hello"}, true}, + {[]string{"hello", "hello"}, []string{"hello", "hello"}, true}, + {[]string{"hello", "hello", "world"}, []string{"hello", "world", "hello"}, true}, + {[3]string{"hello", "hello", "world"}, [3]string{"hello", "world", "hello"}, true}, + {[]int{}, nil, true}, + + // not matching + {[]int{1}, []int{1, 1}, false}, + {[]int{1, 2}, []int{2, 2}, false}, + {[]string{"hello", "hello"}, []string{"hello"}, false}, + } + + for _, test := range tests { + test := test + t.Run(fmt.Sprintf("ElementsMatch(%#v, %#v)", test.expected, test.actual), func(t *testing.T) { + t.Parallel() + + res := testutils.ElementsMatch(test.actual, test.expected) + if res != test.result { + t.Errorf( + "ElementsMatch(%#v, %#v) should return %v", + test.actual, test.expected, test.result) + } + }) + } +}