From 5e587cf5e609ce77ab625398fe2932b8a1267b1c Mon Sep 17 00:00:00 2001 From: "k.molodyakov" Date: Tue, 13 Jun 2023 11:47:27 +0300 Subject: [PATCH 1/2] Add left recursion check --- Makefile | 4 +- ast/ast.go | 468 +++++++++++++++++++++++++++++++++ builder/builder.go | 26 +- builder/left_recursion.go | 134 ++++++++++ builder/left_recursion_test.go | 136 ++++++++++ builder/scc.go | 140 ++++++++++ builder/scc_test.go | 215 +++++++++++++++ go.mod | 6 + go.sum | 30 +++ main.go | 6 +- 10 files changed, 1157 insertions(+), 8 deletions(-) create mode 100644 builder/left_recursion.go create mode 100644 builder/left_recursion_test.go create mode 100644 builder/scc.go create mode 100644 builder/scc_test.go diff --git a/Makefile b/Makefile index ddf0f506..0b33214b 100644 --- a/Makefile +++ b/Makefile @@ -116,7 +116,7 @@ $(TEST_DIR)/goto_state/goto_state.go: $(TEST_DIR)/goto_state/goto_state.peg $(BI $(BINDIR)/pigeon -nolint $< > $@ $(TEST_DIR)/max_expr_cnt/maxexpr.go: $(TEST_DIR)/max_expr_cnt/maxexpr.peg $(BINDIR)/pigeon - $(BINDIR)/pigeon -nolint $< > $@ + $(BINDIR)/pigeon -nolint -ignore-left-recursion $< > $@ $(TEST_DIR)/labeled_failures/labeled_failures.go: $(TEST_DIR)/labeled_failures/labeled_failures.peg $(BINDIR)/pigeon $(BINDIR)/pigeon -nolint $< > $@ @@ -167,7 +167,7 @@ $(TEST_DIR)/issue_70/optimized-grammar/issue_70.go: $(TEST_DIR)/issue_70/issue_7 $(BINDIR)/pigeon -nolint -optimize-grammar $< > $@ $(TEST_DIR)/issue_70b/issue_70b.go: $(TEST_DIR)/issue_70b/issue_70b.peg $(BINDIR)/pigeon - $(BINDIR)/pigeon -nolint --optimize-grammar $< > $@ + $(BINDIR)/pigeon -nolint --optimize-grammar -ignore-left-recursion $< > $@ $(TEST_DIR)/issue_80/issue_80.go: $(TEST_DIR)/issue_80/issue_80.peg $(BINDIR)/pigeon $(BINDIR)/pigeon -nolint $< > $@ diff --git a/ast/ast.go b/ast/ast.go index c34b7d17..6302fcc0 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -35,6 +35,8 @@ type Grammar struct { Rules []*Rule } +var _ Expression = (*Grammar)(nil) + // NewGrammar creates a new grammar at the specified position. func NewGrammar(p Pos) *Grammar { return &Grammar{p: p} @@ -56,6 +58,21 @@ func (g *Grammar) String() string { return buf.String() } +// NullableVisit recursively determines whether an object is nullable +func (g *Grammar) NullableVisit(rules map[string]*Rule) bool { + panic("NullableVisit should not be called on the Grammar") +} + +// IsNullable returns the nullable attribute of the node +func (g *Grammar) IsNullable() bool { + panic("IsNullable should not be called on the Grammar") +} + +// InitialNames returns names of nodes with which an expression can begin +func (g *Grammar) InitialNames() map[string]bool { + panic("InitialNames should not be called on the Grammar") +} + // Rule represents a rule in the PEG grammar. It has a name, an optional // display name to be used in error messages, and an expression. type Rule struct { @@ -63,8 +80,16 @@ type Rule struct { Name *Identifier DisplayName *StringLit Expr Expression + + // for work with left recursion + Visited bool + Nullable bool + LeftRecursive bool + Leader bool } +var _ Expression = (*Rule)(nil) + // NewRule creates a rule with at the specified position and with the // specified name as identifier. func NewRule(p Pos, name *Identifier) *Rule { @@ -80,9 +105,35 @@ func (r *Rule) String() string { r.p, r, r.Name, r.DisplayName, r.Expr) } +// NullableVisit recursively determines whether an object is nullable +func (r *Rule) NullableVisit(rules map[string]*Rule) bool { + if r.Visited { + // A left-recursive rule is considered non-nullable. + return false + } + r.Visited = true + r.Nullable = r.Expr.NullableVisit(rules) + return r.Nullable +} + +// IsNullable returns the nullable attribute of the node +func (r *Rule) IsNullable() bool { + return r.Nullable +} + +// InitialNames returns names of nodes with which an expression can begin +func (r *Rule) InitialNames() map[string]bool { + return r.Expr.InitialNames() +} + // Expression is the interface implemented by all expression types. type Expression interface { Pos() Pos + + // for work with left recursion + NullableVisit(rules map[string]*Rule) bool + IsNullable() bool + InitialNames() map[string]bool } // ChoiceExpr is an ordered sequence of expressions. The parser tries to @@ -91,8 +142,12 @@ type Expression interface { type ChoiceExpr struct { p Pos Alternatives []Expression + + Nullable bool } +var _ Expression = (*ChoiceExpr)(nil) + // NewChoiceExpr creates a choice expression at the specified position. func NewChoiceExpr(p Pos) *ChoiceExpr { return &ChoiceExpr{p: p} @@ -113,6 +168,34 @@ func (c *ChoiceExpr) String() string { return buf.String() } +// NullableVisit recursively determines whether an object is nullable +func (c *ChoiceExpr) NullableVisit(rules map[string]*Rule) bool { + for _, alt := range c.Alternatives { + if alt.NullableVisit(rules) { + c.Nullable = true + return true + } + } + c.Nullable = false + return false +} + +// IsNullable returns the nullable attribute of the node +func (c *ChoiceExpr) IsNullable() bool { + return c.Nullable +} + +// InitialNames returns names of nodes with which an expression can begin +func (c *ChoiceExpr) InitialNames() map[string]bool { + names := make(map[string]bool) + for _, alt := range c.Alternatives { + for name := range alt.InitialNames() { + names[name] = true + } + } + return names +} + // FailureLabel is an identifier, which can by thrown and recovered in a grammar type FailureLabel string @@ -124,8 +207,12 @@ type RecoveryExpr struct { Expr Expression RecoverExpr Expression Labels []FailureLabel + + Nullable bool } +var _ Expression = (*RecoveryExpr)(nil) + // NewRecoveryExpr creates a choice expression at the specified position. func NewRecoveryExpr(p Pos) *RecoveryExpr { return &RecoveryExpr{p: p} @@ -147,6 +234,29 @@ func (r *RecoveryExpr) String() string { return buf.String() } +// NullableVisit recursively determines whether an object is nullable +func (r *RecoveryExpr) NullableVisit(rules map[string]*Rule) bool { + r.Nullable = r.Expr.NullableVisit(rules) || r.RecoverExpr.NullableVisit(rules) + return r.Nullable +} + +// IsNullable returns the nullable attribute of the node +func (r *RecoveryExpr) IsNullable() bool { + return r.Nullable +} + +// InitialNames returns names of nodes with which an expression can begin +func (r *RecoveryExpr) InitialNames() map[string]bool { + names := make(map[string]bool) + for name := range r.Expr.InitialNames() { + names[name] = true + } + for name := range r.RecoverExpr.InitialNames() { + names[name] = true + } + return names +} + // ActionExpr is an expression that has an associated block of code to // execute when the expression matches. type ActionExpr struct { @@ -154,8 +264,12 @@ type ActionExpr struct { Expr Expression Code *CodeBlock FuncIx int + + Nullable bool } +var _ Expression = (*ActionExpr)(nil) + // NewActionExpr creates a new action expression at the specified position. func NewActionExpr(p Pos) *ActionExpr { return &ActionExpr{p: p} @@ -169,6 +283,26 @@ func (a *ActionExpr) String() string { return fmt.Sprintf("%s: %T{Expr: %v, Code: %v}", a.p, a, a.Expr, a.Code) } +// NullableVisit recursively determines whether an object is nullable +func (a *ActionExpr) NullableVisit(rules map[string]*Rule) bool { + a.Nullable = a.Expr.NullableVisit(rules) + return a.Nullable +} + +// IsNullable returns the nullable attribute of the node +func (a *ActionExpr) IsNullable() bool { + return a.Nullable +} + +// InitialNames returns names of nodes with which an expression can begin +func (a *ActionExpr) InitialNames() map[string]bool { + names := make(map[string]bool) + for name := range a.Expr.InitialNames() { + names[name] = true + } + return names +} + // ThrowExpr is an expression that throws an FailureLabel to be catched by a // RecoveryChoiceExpr. type ThrowExpr struct { @@ -176,6 +310,8 @@ type ThrowExpr struct { Label string } +var _ Expression = (*ThrowExpr)(nil) + // NewThrowExpr creates a new throw expression at the specified position. func NewThrowExpr(p Pos) *ThrowExpr { return &ThrowExpr{p: p} @@ -189,13 +325,32 @@ func (t *ThrowExpr) String() string { return fmt.Sprintf("%s: %T{Label: %v}", t.p, t, t.Label) } +// NullableVisit recursively determines whether an object is nullable +func (t *ThrowExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node +func (t *ThrowExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin +func (t *ThrowExpr) InitialNames() map[string]bool { + return make(map[string]bool) +} + // SeqExpr is an ordered sequence of expressions, all of which must match // if the SeqExpr is to be a match itself. type SeqExpr struct { p Pos Exprs []Expression + + Nullable bool } +var _ Expression = (*SeqExpr)(nil) + // NewSeqExpr creates a new sequence expression at the specified position. func NewSeqExpr(p Pos) *SeqExpr { return &SeqExpr{p: p} @@ -216,6 +371,37 @@ func (s *SeqExpr) String() string { return buf.String() } +// NullableVisit recursively determines whether an object is nullable +func (s *SeqExpr) NullableVisit(rules map[string]*Rule) bool { + for _, item := range s.Exprs { + if !item.NullableVisit(rules) { + s.Nullable = false + return false + } + } + s.Nullable = true + return true +} + +// IsNullable returns the nullable attribute of the node +func (s *SeqExpr) IsNullable() bool { + return s.Nullable +} + +// InitialNames returns names of nodes with which an expression can begin +func (s *SeqExpr) InitialNames() map[string]bool { + names := make(map[string]bool) + for _, item := range s.Exprs { + for name := range item.InitialNames() { + names[name] = true + } + if !item.IsNullable() { + break + } + } + return names +} + // LabeledExpr is an expression that has an associated label. Code blocks // can access the value of the expression using that label, that becomes // a local variable in the code. @@ -225,6 +411,8 @@ type LabeledExpr struct { Expr Expression } +var _ Expression = (*LabeledExpr)(nil) + // NewLabeledExpr creates a new labeled expression at the specified position. func NewLabeledExpr(p Pos) *LabeledExpr { return &LabeledExpr{p: p} @@ -238,6 +426,21 @@ func (l *LabeledExpr) String() string { return fmt.Sprintf("%s: %T{Label: %v, Expr: %v}", l.p, l, l.Label, l.Expr) } +// NullableVisit recursively determines whether an object is nullable +func (l *LabeledExpr) NullableVisit(rules map[string]*Rule) bool { + return l.Expr.NullableVisit(rules) +} + +// IsNullable returns the nullable attribute of the node +func (l *LabeledExpr) IsNullable() bool { + return l.Expr.IsNullable() +} + +// InitialNames returns names of nodes with which an expression can begin +func (l *LabeledExpr) InitialNames() map[string]bool { + return l.Expr.InitialNames() +} + // AndExpr is a zero-length matcher that is considered a match if the // expression it contains is a match. type AndExpr struct { @@ -250,6 +453,8 @@ func NewAndExpr(p Pos) *AndExpr { return &AndExpr{p: p} } +var _ Expression = (*AndExpr)(nil) + // Pos returns the starting position of the node. func (a *AndExpr) Pos() Pos { return a.p } @@ -258,6 +463,21 @@ func (a *AndExpr) String() string { return fmt.Sprintf("%s: %T{Expr: %v}", a.p, a, a.Expr) } +// NullableVisit recursively determines whether an object is nullable +func (a *AndExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node +func (a *AndExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin +func (a *AndExpr) InitialNames() map[string]bool { + return make(map[string]bool) +} + // NotExpr is a zero-length matcher that is considered a match if the // expression it contains is not a match. type NotExpr struct { @@ -265,6 +485,8 @@ type NotExpr struct { Expr Expression } +var _ Expression = (*NotExpr)(nil) + // NewNotExpr creates a new not (!) expression at the specified position. func NewNotExpr(p Pos) *NotExpr { return &NotExpr{p: p} @@ -278,12 +500,29 @@ func (n *NotExpr) String() string { return fmt.Sprintf("%s: %T{Expr: %v}", n.p, n, n.Expr) } +// NullableVisit recursively determines whether an object is nullable +func (n *NotExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node +func (n *NotExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin +func (n *NotExpr) InitialNames() map[string]bool { + return make(map[string]bool) +} + // ZeroOrOneExpr is an expression that can be matched zero or one time. type ZeroOrOneExpr struct { p Pos Expr Expression } +var _ Expression = (*ZeroOrOneExpr)(nil) + // NewZeroOrOneExpr creates a new zero or one expression at the specified // position. func NewZeroOrOneExpr(p Pos) *ZeroOrOneExpr { @@ -298,12 +537,29 @@ func (z *ZeroOrOneExpr) String() string { return fmt.Sprintf("%s: %T{Expr: %v}", z.p, z, z.Expr) } +// NullableVisit recursively determines whether an object is nullable +func (z *ZeroOrOneExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node +func (z *ZeroOrOneExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin +func (z *ZeroOrOneExpr) InitialNames() map[string]bool { + return z.Expr.InitialNames() +} + // ZeroOrMoreExpr is an expression that can be matched zero or more times. type ZeroOrMoreExpr struct { p Pos Expr Expression } +var _ Expression = (*ZeroOrMoreExpr)(nil) + // NewZeroOrMoreExpr creates a new zero or more expression at the specified // position. func NewZeroOrMoreExpr(p Pos) *ZeroOrMoreExpr { @@ -318,12 +574,29 @@ func (z *ZeroOrMoreExpr) String() string { return fmt.Sprintf("%s: %T{Expr: %v}", z.p, z, z.Expr) } +// NullableVisit recursively determines whether an object is nullable +func (z *ZeroOrMoreExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node +func (z *ZeroOrMoreExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin +func (z *ZeroOrMoreExpr) InitialNames() map[string]bool { + return z.Expr.InitialNames() +} + // OneOrMoreExpr is an expression that can be matched one or more times. type OneOrMoreExpr struct { p Pos Expr Expression } +var _ Expression = (*OneOrMoreExpr)(nil) + // NewOneOrMoreExpr creates a new one or more expression at the specified // position. func NewOneOrMoreExpr(p Pos) *OneOrMoreExpr { @@ -338,12 +611,31 @@ func (o *OneOrMoreExpr) String() string { return fmt.Sprintf("%s: %T{Expr: %v}", o.p, o, o.Expr) } +// NullableVisit recursively determines whether an object is nullable +func (o *OneOrMoreExpr) NullableVisit(rules map[string]*Rule) bool { + return false +} + +// IsNullable returns the nullable attribute of the node +func (o *OneOrMoreExpr) IsNullable() bool { + return false +} + +// InitialNames returns names of nodes with which an expression can begin +func (o *OneOrMoreExpr) InitialNames() map[string]bool { + return o.Expr.InitialNames() +} + // RuleRefExpr is an expression that references a rule by name. type RuleRefExpr struct { p Pos Name *Identifier + + Nullable bool } +var _ Expression = (*RuleRefExpr)(nil) + // NewRuleRefExpr creates a new rule reference expression at the specified // position. func NewRuleRefExpr(p Pos) *RuleRefExpr { @@ -358,6 +650,28 @@ func (r *RuleRefExpr) String() string { return fmt.Sprintf("%s: %T{Name: %v}", r.p, r, r.Name) } +// NullableVisit recursively determines whether an object is nullable +func (r *RuleRefExpr) NullableVisit(rules map[string]*Rule) bool { + item, ok := rules[r.Name.Val] + if !ok { + // Token or unknown; never empty. + r.Nullable = false + return false + } + r.Nullable = item.NullableVisit(rules) + return r.Nullable +} + +// IsNullable returns the nullable attribute of the node +func (r *RuleRefExpr) IsNullable() bool { + return r.Nullable +} + +// InitialNames returns names of nodes with which an expression can begin +func (r *RuleRefExpr) InitialNames() map[string]bool { + return map[string]bool{r.Name.Val: true} +} + // StateCodeExpr is an expression which can modify the internal state of the parser. type StateCodeExpr struct { p Pos @@ -365,6 +679,8 @@ type StateCodeExpr struct { FuncIx int } +var _ Expression = (*StateCodeExpr)(nil) + // NewStateCodeExpr creates a new state (#) code expression at the specified // position. func NewStateCodeExpr(p Pos) *StateCodeExpr { @@ -379,6 +695,21 @@ func (s *StateCodeExpr) String() string { return fmt.Sprintf("%s: %T{Code: %v}", s.p, s, s.Code) } +// NullableVisit recursively determines whether an object is nullable +func (s *StateCodeExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node +func (s *StateCodeExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin +func (s *StateCodeExpr) InitialNames() map[string]bool { + return make(map[string]bool) +} + // AndCodeExpr is a zero-length matcher that is considered a match if the // code block returns true. type AndCodeExpr struct { @@ -387,6 +718,8 @@ type AndCodeExpr struct { FuncIx int } +var _ Expression = (*AndCodeExpr)(nil) + // NewAndCodeExpr creates a new and (&) code expression at the specified // position. func NewAndCodeExpr(p Pos) *AndCodeExpr { @@ -401,6 +734,21 @@ func (a *AndCodeExpr) String() string { return fmt.Sprintf("%s: %T{Code: %v}", a.p, a, a.Code) } +// NullableVisit recursively determines whether an object is nullable +func (a *AndCodeExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node +func (a *AndCodeExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin +func (a *AndCodeExpr) InitialNames() map[string]bool { + return make(map[string]bool) +} + // NotCodeExpr is a zero-length matcher that is considered a match if the // code block returns false. type NotCodeExpr struct { @@ -409,6 +757,8 @@ type NotCodeExpr struct { FuncIx int } +var _ Expression = (*NotCodeExpr)(nil) + // NewNotCodeExpr creates a new not (!) code expression at the specified // position. func NewNotCodeExpr(p Pos) *NotCodeExpr { @@ -423,6 +773,21 @@ func (n *NotCodeExpr) String() string { return fmt.Sprintf("%s: %T{Code: %v}", n.p, n, n.Code) } +// NullableVisit recursively determines whether an object is nullable +func (n *NotCodeExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node +func (n *NotCodeExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin +func (n *NotCodeExpr) InitialNames() map[string]bool { + return make(map[string]bool) +} + // LitMatcher is a string literal matcher. The value to match may be a // double-quoted string, a single-quoted single character, or a back-tick // quoted raw string. @@ -431,6 +796,8 @@ type LitMatcher struct { IgnoreCase bool } +var _ Expression = (*LitMatcher)(nil) + // NewLitMatcher creates a new literal matcher at the specified position and // with the specified value. func NewLitMatcher(p Pos, v string) *LitMatcher { @@ -445,6 +812,22 @@ func (l *LitMatcher) String() string { return fmt.Sprintf("%s: %T{Val: %q, IgnoreCase: %t}", l.p, l, l.Val, l.IgnoreCase) } +// NullableVisit recursively determines whether an object is nullable +func (l *LitMatcher) NullableVisit(rules map[string]*Rule) bool { + return l.IsNullable() +} + +// IsNullable returns the nullable attribute of the node +func (l *LitMatcher) IsNullable() bool { + // The string token '' is considered empty. + return len(l.Val) == 0 +} + +// InitialNames returns names of nodes with which an expression can begin +func (l *LitMatcher) InitialNames() map[string]bool { + return make(map[string]bool) +} + // CharClassMatcher is a character class matcher. The value to match must // be one of the specified characters, in a range of characters, or in the // Unicode classes of characters. @@ -457,6 +840,8 @@ type CharClassMatcher struct { UnicodeClasses []string } +var _ Expression = (*CharClassMatcher)(nil) + // NewCharClassMatcher creates a new character class matcher at the specified // position and with the specified raw value. It parses the raw value into // the list of characters, ranges and Unicode classes. @@ -580,11 +965,28 @@ func (c *CharClassMatcher) String() string { c.p, c, c.Val, c.IgnoreCase, c.Inverted) } +// NullableVisit recursively determines whether an object is nullable +func (c *CharClassMatcher) NullableVisit(rules map[string]*Rule) bool { + return c.IsNullable() +} + +// IsNullable returns the nullable attribute of the node +func (c *CharClassMatcher) IsNullable() bool { + return len(c.Chars) == 0 && len(c.Ranges) == 0 && len(c.UnicodeClasses) == 0 +} + +// InitialNames returns names of nodes with which an expression can begin +func (c *CharClassMatcher) InitialNames() map[string]bool { + return make(map[string]bool) +} + // AnyMatcher is a matcher that matches any character except end-of-file. type AnyMatcher struct { posValue } +var _ Expression = (*AnyMatcher)(nil) + // NewAnyMatcher creates a new any matcher at the specified position. The // value is provided for completeness' sake, but it is always the dot. func NewAnyMatcher(p Pos, v string) *AnyMatcher { @@ -599,11 +1001,28 @@ func (a *AnyMatcher) String() string { return fmt.Sprintf("%s: %T{Val: %q}", a.p, a, a.Val) } +// NullableVisit recursively determines whether an object is nullable +func (a *AnyMatcher) NullableVisit(rules map[string]*Rule) bool { + return false +} + +// IsNullable returns the nullable attribute of the node +func (a *AnyMatcher) IsNullable() bool { + return false +} + +// InitialNames returns names of nodes with which an expression can begin +func (a *AnyMatcher) InitialNames() map[string]bool { + return make(map[string]bool) +} + // CodeBlock represents a code block. type CodeBlock struct { posValue } +var _ Expression = (*CodeBlock)(nil) + // NewCodeBlock creates a new code block at the specified position and with // the specified value. The value includes the outer braces. func NewCodeBlock(p Pos, code string) *CodeBlock { @@ -618,11 +1037,28 @@ func (c *CodeBlock) String() string { return fmt.Sprintf("%s: %T{Val: %q}", c.p, c, c.Val) } +// NullableVisit recursively determines whether an object is nullable +func (c *CodeBlock) NullableVisit(rules map[string]*Rule) bool { + panic("NullableVisit should not be called on the CodeBlock") +} + +// IsNullable returns the nullable attribute of the node +func (c *CodeBlock) IsNullable() bool { + panic("IsNullable should not be called on the CodeBlock") +} + +// InitialNames returns names of nodes with which an expression can begin +func (c *CodeBlock) InitialNames() map[string]bool { + panic("InitialNames should not be called on the CodeBlock") +} + // Identifier represents an identifier. type Identifier struct { posValue } +var _ Expression = (*Identifier)(nil) + // NewIdentifier creates a new identifier at the specified position and // with the specified name. func NewIdentifier(p Pos, name string) *Identifier { @@ -637,11 +1073,28 @@ func (i *Identifier) String() string { return fmt.Sprintf("%s: %T{Val: %q}", i.p, i, i.Val) } +// NullableVisit recursively determines whether an object is nullable +func (i *Identifier) NullableVisit(rules map[string]*Rule) bool { + panic("NullableVisit should not be called on the Identifier") +} + +// IsNullable returns the nullable attribute of the node +func (i *Identifier) IsNullable() bool { + panic("IsNullable should not be called on the Identifier") +} + +// InitialNames returns names of nodes with which an expression can begin +func (i *Identifier) InitialNames() map[string]bool { + panic("InitialNames should not be called on the Identifier") +} + // StringLit represents a string literal. type StringLit struct { posValue } +var _ Expression = (*StringLit)(nil) + // NewStringLit creates a new string literal at the specified position and // with the specified value. func NewStringLit(p Pos, val string) *StringLit { @@ -656,6 +1109,21 @@ func (s *StringLit) String() string { return fmt.Sprintf("%s: %T{Val: %q}", s.p, s, s.Val) } +// NullableVisit recursively determines whether an object is nullable +func (s *StringLit) NullableVisit(rules map[string]*Rule) bool { + panic("NullableVisit should not be called on the StringLit") +} + +// IsNullable returns the nullable attribute of the node +func (s *StringLit) IsNullable() bool { + panic("IsNullable should not be called on the StringLit") +} + +// InitialNames returns names of nodes with which an expression can begin +func (s *StringLit) InitialNames() map[string]bool { + panic("InitialNames should not be called on the StringLit") +} + type posValue struct { p Pos Val string diff --git a/builder/builder.go b/builder/builder.go index ee76666a..9d067327 100644 --- a/builder/builder.go +++ b/builder/builder.go @@ -77,6 +77,17 @@ func Optimize(optimize bool) Option { } } +// IgnoreLeftRecursion returns an option that specifies the ignoreLeftRecursion option +// If ignoreLeftRecursion is true, errors associated +// with the use of left recursion rules are ignored. +func IgnoreLeftRecursion(ignore bool) Option { + return func(b *builder) Option { + prev := b.optimize + b.ignoreLeftRecursion = ignore + return IgnoreLeftRecursion(prev) + } +} + // Nolint returns an option that specifies the nolint option // If nolint is true, special '// nolint: ...' comments are added // to the generated parser to suppress warnings by gometalinter. @@ -118,6 +129,7 @@ type builder struct { basicLatinLookupTable bool globalState bool nolint bool + ignoreLeftRecursion bool ruleName string exprIndex int @@ -132,11 +144,15 @@ func (b *builder) setOptions(opts []Option) { } } -func (b *builder) buildParser(g *ast.Grammar) error { - b.writeInit(g.Init) - b.writeGrammar(g) - - for _, rule := range g.Rules { +func (b *builder) buildParser(grammar *ast.Grammar) error { + if err := PrepareGramma(grammar); err != nil { + if !b.ignoreLeftRecursion { + return fmt.Errorf("uncorrect gramma: %w", err) + } + } + b.writeInit(grammar.Init) + b.writeGrammar(grammar) + for _, rule := range grammar.Rules { b.writeRuleCode(rule) } b.writeStaticCode() diff --git a/builder/left_recursion.go b/builder/left_recursion.go new file mode 100644 index 00000000..a93c8b26 --- /dev/null +++ b/builder/left_recursion.go @@ -0,0 +1,134 @@ +package builder + +import ( + "errors" + "fmt" + + "github.com/mna/pigeon/ast" +) + +var ( + // ErrNoLeader is no leader error. + ErrNoLeader = errors.New( + "SCC has no leadership candidate (no element is included in all cycles)") + // ErrHaveLeftRecirsion is recursion error. + ErrHaveLeftRecirsion = errors.New("have left recursion") +) + +// PrepareGramma evaluates parameters associated with left recursion +func PrepareGramma(grammar *ast.Grammar) error { + mapRules := make(map[string]*ast.Rule, len(grammar.Rules)) + for _, rule := range grammar.Rules { + mapRules[rule.Name.Val] = rule + } + ComputeNullables(mapRules) + if err := ComputeLeftRecursives(mapRules); err != nil { + return fmt.Errorf("error compute left recursive: %w", err) + } + rulesWithLeftRecursion := []string{} + for _, rule := range grammar.Rules { + if rule.LeftRecursive { + rulesWithLeftRecursion = append(rulesWithLeftRecursion, rule.Name.Val) + } + } + if len(rulesWithLeftRecursion) > 0 { + return fmt.Errorf("%w: %v", ErrHaveLeftRecirsion, rulesWithLeftRecursion) + } + + return nil +} + +// ComputeNullables evaluates nullable nodes +func ComputeNullables(rules map[string]*ast.Rule) { + // Compute which rules in a grammar are nullable + for _, rule := range rules { + rule.NullableVisit(rules) + } +} + +func findLeader( + graph map[string]map[string]bool, scc map[string]bool, +) (string, error) { + // Try to find a leader such that all cycles go through it. + leaders := make(map[string]bool, len(scc)) + for k := range scc { + leaders[k] = true + } + for start := range scc { + for _, cycle := range FindCyclesInSCC(graph, scc, start) { + mapCycle := map[string]bool{} + for _, k := range cycle { + mapCycle[k] = true + } + for k := range scc { + if _, okCycle := mapCycle[k]; !okCycle { + delete(leaders, k) + } + } + if len(leaders) == 0 { + return "", ErrNoLeader + } + } + } + // Pick an arbitrary leader from the candidates. + var leader string + for k := range leaders { + leader = k // The only element. + break + } + return leader, nil +} + +// ComputeLeftRecursives evaluates left recursion +func ComputeLeftRecursives(rules map[string]*ast.Rule) error { + graph := MakeFirstGraph(rules) + vertices := make([]string, 0, len(graph)) + for k := range graph { + vertices = append(vertices, k) + } + sccs := StronglyConnectedComponents(vertices, graph) + for _, scc := range sccs { + if len(scc) > 1 { + for name := range scc { + rules[name].LeftRecursive = true + } + leader, err := findLeader(graph, scc) + if err != nil { + return fmt.Errorf("error find leader %v: %w", scc, err) + } + rules[leader].Leader = true + } else { + var name string + for k := range scc { + name = k // The only element. + break + } + if _, ok := graph[name][name]; ok { + rules[name].LeftRecursive = true + rules[name].Leader = true + } + } + } + return nil +} + +// MakeFirstGraph compute the graph of left-invocations. +// There's an edge from A to B if A may invoke B at its initial position. +// Note that this requires the nullable flags to have been computed. +func MakeFirstGraph(rules map[string]*ast.Rule) map[string]map[string]bool { + graph := make(map[string]map[string]bool) + vertices := make(map[string]bool) + for rulename, rule := range rules { + names := rule.InitialNames() + graph[rulename] = names + for name := range names { + vertices[name] = true + } + } + for vertex := range vertices { + if _, ok := graph[vertex]; !ok { + graph[vertex] = make(map[string]bool) + } + } + return graph +} diff --git a/builder/left_recursion_test.go b/builder/left_recursion_test.go new file mode 100644 index 00000000..64aad87b --- /dev/null +++ b/builder/left_recursion_test.go @@ -0,0 +1,136 @@ +package builder_test + +import ( + "strings" + "testing" + + "github.com/mna/pigeon/ast" + "github.com/mna/pigeon/bootstrap" + "github.com/mna/pigeon/builder" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestLeftRrecursive(t *testing.T) { + t.Parallel() + grammar := ` + start = expr NEWLINE + expr = ('-' term / expr '+' term / term) + term = NUMBER + foo = NAME+ + bar = NAME* + baz = NAME? + ` + p := bootstrap.NewParser() + g, err := p.Parse("", strings.NewReader(grammar)) + require.NoError(t, err) + err = builder.PrepareGramma(g) + require.ErrorIs(t, err, builder.ErrHaveLeftRecirsion) + mapRules := make(map[string]*ast.Rule, len(g.Rules)) + for _, rule := range g.Rules { + mapRules[rule.Name.Val] = rule + } + assert.False(t, mapRules["start"].LeftRecursive) + assert.True(t, mapRules["expr"].LeftRecursive) + assert.False(t, mapRules["term"].LeftRecursive) + assert.False(t, mapRules["foo"].LeftRecursive) + assert.False(t, mapRules["bar"].LeftRecursive) + assert.False(t, mapRules["baz"].LeftRecursive) +} + +func TestNullable(t *testing.T) { + t.Parallel() + grammar := ` + start = sign NUMBER + sign = ('-' / '+')? + ` + p := bootstrap.NewParser() + g, err := p.Parse("", strings.NewReader(grammar)) + require.NoError(t, err) + err = builder.PrepareGramma(g) + require.NoError(t, err) + mapRules := make(map[string]*ast.Rule, len(g.Rules)) + for _, rule := range g.Rules { + mapRules[rule.Name.Val] = rule + } + assert.False(t, mapRules["start"].Nullable) + assert.True(t, mapRules["sign"].Nullable) +} + +func TestAdvancedLeftRrecursive(t *testing.T) { + t.Parallel() + grammar := ` + start = NUMBER / sign start + sign = '-'? + ` + p := bootstrap.NewParser() + g, err := p.Parse("", strings.NewReader(grammar)) + require.NoError(t, err) + err = builder.PrepareGramma(g) + require.ErrorIs(t, err, builder.ErrHaveLeftRecirsion) + mapRules := make(map[string]*ast.Rule, len(g.Rules)) + for _, rule := range g.Rules { + mapRules[rule.Name.Val] = rule + } + assert.False(t, mapRules["start"].Nullable) + assert.True(t, mapRules["sign"].Nullable) + assert.True(t, mapRules["start"].LeftRecursive) + assert.False(t, mapRules["sign"].LeftRecursive) +} + +func TestMutuallyLeftRrecursive(t *testing.T) { + t.Parallel() + grammar := ` + start = foo 'E' + foo = bar 'A' / 'B' + bar = foo 'C' / 'D' + ` + p := bootstrap.NewParser() + g, err := p.Parse("", strings.NewReader(grammar)) + require.NoError(t, err) + err = builder.PrepareGramma(g) + require.ErrorIs(t, err, builder.ErrHaveLeftRecirsion) + mapRules := make(map[string]*ast.Rule, len(g.Rules)) + for _, rule := range g.Rules { + mapRules[rule.Name.Val] = rule + } + assert.False(t, mapRules["start"].LeftRecursive) + assert.True(t, mapRules["foo"].LeftRecursive) + assert.True(t, mapRules["bar"].LeftRecursive) +} + +func TestNastyMutuallyLeftRrecursive(t *testing.T) { + t.Parallel() + grammar := ` + start = target '=' + target = maybe '+' / NAME + maybe = maybe '-' / target + ` + p := bootstrap.NewParser() + g, err := p.Parse("", strings.NewReader(grammar)) + require.NoError(t, err) + err = builder.PrepareGramma(g) + require.ErrorIs(t, err, builder.ErrHaveLeftRecirsion) + mapRules := make(map[string]*ast.Rule, len(g.Rules)) + for _, rule := range g.Rules { + mapRules[rule.Name.Val] = rule + } + assert.False(t, mapRules["start"].LeftRecursive) + assert.True(t, mapRules["target"].LeftRecursive) + assert.True(t, mapRules["maybe"].LeftRecursive) +} + +func TestLeftRecursionTooComplex(t *testing.T) { + t.Parallel() + grammar := ` + start = foo + foo = bar '+' / baz '+' / '+' + bar = baz '-' / foo '-' / '-' + baz = foo '*' / bar '*' / '*' + ` + p := bootstrap.NewParser() + g, err := p.Parse("", strings.NewReader(grammar)) + require.NoError(t, err) + err = builder.PrepareGramma(g) + require.ErrorIs(t, err, builder.ErrNoLeader) +} diff --git a/builder/scc.go b/builder/scc.go new file mode 100644 index 00000000..2de71eae --- /dev/null +++ b/builder/scc.go @@ -0,0 +1,140 @@ +package builder + +import "fmt" + +func min(a1 int, a2 int) int { + if a1 <= a2 { + return a1 + } + return a2 +} + +// StronglyConnectedComponents compute strongly сonnected сomponents of a graph. +// Tarjan's strongly connected components algorithm +func StronglyConnectedComponents( + vertices []string, edges map[string]map[string]bool, +) []map[string]bool { + // Tarjan's strongly connected components algorithm + var ( + identified = map[string]bool{} + stack = []string{} + index = map[string]int{} + lowlink = map[string]int{} + dfs func(v string) []map[string]bool + ) + + dfs = func(vertex string) []map[string]bool { + index[vertex] = len(stack) + stack = append(stack, vertex) + lowlink[vertex] = index[vertex] + + sccs := []map[string]bool{} + for w := range edges[vertex] { + if _, ok := index[w]; !ok { + sccs = append(sccs, dfs(w)...) + lowlink[vertex] = min(lowlink[vertex], lowlink[w]) + } else if _, ok := identified[w]; !ok { + lowlink[vertex] = min(lowlink[vertex], lowlink[w]) + } + } + + if lowlink[vertex] == index[vertex] { + scc := map[string]bool{} + for _, v := range stack[index[vertex]:] { + scc[v] = true + } + stack = stack[:index[vertex]] + for v := range scc { + identified[v] = true + } + sccs = append(sccs, scc) + } + return sccs + } + + sccs := []map[string]bool{} + for _, v := range vertices { + if _, ok := index[v]; !ok { + sccs = append(sccs, dfs(v)...) + } + } + return sccs +} + +func contains(s []string, e string) bool { + for _, a := range s { + if a == e { + return true + } + } + return false +} + +func reduceGraph( + graph map[string]map[string]bool, scc map[string]bool, +) map[string]map[string]bool { + reduceGraph := map[string]map[string]bool{} + for src, dsts := range graph { + if _, ok := scc[src]; !ok { + continue + } + reduceGraph[src] = map[string]bool{} + for dst := range dsts { + if _, ok := scc[dst]; !ok { + continue + } + reduceGraph[src][dst] = true + } + } + return reduceGraph +} + +// FindCyclesInSCC find cycles in SCC emanating from start. +// Yields lists of the form ['A', 'B', 'C', 'A'], which means there's +// a path from A -> B -> C -> A. The first item is always the start +// argument, but the last item may be another element, e.g. ['A', +// 'B', 'C', 'B'] means there's a path from A to B and there's a +// cycle from B to C and back. +func FindCyclesInSCC( + graph map[string]map[string]bool, scc map[string]bool, start string, +) [][]string { + // Basic input checks. + if _, ok := scc[start]; !ok { + panic(fmt.Sprintf("scc %v have not %v", scc, start)) + } + extravertices := []string{} + for k := range scc { + if _, ok := graph[k]; !ok { + extravertices = append(extravertices, k) + } + } + if len(extravertices) != 0 { + panic(fmt.Sprintf("graph have not scc. %v", extravertices)) + } + + // Reduce the graph to nodes in the SCC. + graph = reduceGraph(graph, scc) + if _, ok := graph[start]; !ok { + panic(fmt.Sprintf("graph %v have not %v", graph, start)) + } + + // Recursive helper that yields cycles. + var dfs func(node string, path []string) [][]string + dfs = func(node string, path []string) [][]string { + ret := [][]string{} + if contains(path, node) { + t := make([]string, 0, len(path)+1) + t = append(t, path...) + t = append(t, node) + ret = append(ret, t) + return ret + } + path = append(path, node) // TODO: Make this not quadratic. + for child := range graph[node] { + ret = append(ret, dfs(child, path)...) + } + return ret + } + + return dfs(start, []string{}) +} diff --git a/builder/scc_test.go b/builder/scc_test.go new file mode 100644 index 00000000..71161434 --- /dev/null +++ b/builder/scc_test.go @@ -0,0 +1,215 @@ +package builder_test + +import ( + "testing" + + "github.com/mna/pigeon/builder" + "github.com/stretchr/testify/require" +) + +func TestStronglyConnectedComponents(t *testing.T) { //nolint:funlen + t.Parallel() + + type want struct { + sccs []map[string]bool + } + + tests := []struct { + name string + graph map[string]map[string]bool + want want + }{ + { + name: "Simple", + graph: map[string]map[string]bool{ + "1": {"2": true}, + "2": {"1": true}, + }, + want: want{sccs: []map[string]bool{ + {"2": true, "1": true}, + }}, + }, + { + name: "Without scc", + graph: map[string]map[string]bool{ + "1": {"2": true}, + }, + want: want{sccs: []map[string]bool{ + {"2": true}, + {"1": true}, + }}, + }, + { + name: "One element", + graph: map[string]map[string]bool{ + "1": {}, + }, + want: want{sccs: []map[string]bool{ + {"1": true}, + }}, + }, + { + name: "One element with loop", + graph: map[string]map[string]bool{ + "1": {"1": true}, + }, + want: want{sccs: []map[string]bool{ + {"1": true}, + }}, + }, + { + name: "Wiki 1", + graph: map[string]map[string]bool{ + "1": {"2": true}, + "2": {"3": true}, + "3": {"1": true}, + "4": {"2": true, "3": true, "6": true}, + "5": {"3": true, "7": true}, + "6": {"4": true, "5": true}, + "7": {"5": true}, + "8": {"6": true, "7": true, "8": true}, + }, + want: want{sccs: []map[string]bool{ + {"2": true, "3": true, "1": true}, + {"5": true, "7": true}, + {"4": true, "6": true}, + {"8": true}, + }}, + }, + { + name: "Wiki 2", + graph: map[string]map[string]bool{ + "1": {"2": true, "6": true}, + "2": {"6": true, "4": true}, + "3": {"9": true, "4": true, "8": true}, + "4": {"1": true, "7": true}, + "5": {"9": true, "8": true}, + "6": {"1": true, "4": true, "7": true}, + "7": {"1": true}, + "8": {"5": true, "3": true}, + "9": {"8": true}, + }, + want: want{sccs: []map[string]bool{ + {"1": true, "2": true, "4": true, "6": true, "7": true}, + {"3": true, "5": true, "9": true, "8": true}, + }}, + }, + } + + for _, testCase := range tests { + testCase := testCase + t.Run(testCase.name, func(t *testing.T) { + t.Parallel() + vertices := make([]string, 0, len(testCase.graph)) + for k := range testCase.graph { + vertices = append(vertices, k) + } + require.ElementsMatch(t, builder.StronglyConnectedComponents( + vertices, testCase.graph), testCase.want.sccs) + }) + } +} + +func TestFindCyclesInSCC(t *testing.T) { //nolint:funlen + t.Parallel() + + type want struct { + paths [][]string + } + + tests := []struct { + name string + graph map[string]map[string]bool + scc map[string]bool + start string + want want + }{ + { + name: "Wiki 1 1", + graph: map[string]map[string]bool{ + "1": {"2": true}, + "2": {"3": true}, + "3": {"1": true}, + "4": {"2": true, "3": true, "6": true}, + "5": {"3": true, "7": true}, + "6": {"4": true, "5": true}, + "7": {"5": true}, + "8": {"6": true, "7": true, "8": true}, + }, + scc: map[string]bool{"2": true, "3": true, "1": true}, + start: "3", + want: want{paths: [][]string{{"3", "1", "2", "3"}}}, + }, + { + name: "Wiki 1 2", + graph: map[string]map[string]bool{ + "1": {"2": true}, + "2": {"3": true}, + "3": {"1": true}, + "4": {"2": true, "3": true, "6": true}, + "5": {"3": true, "7": true}, + "6": {"4": true, "5": true}, + "7": {"5": true}, + "8": {"6": true, "7": true, "8": true}, + }, + scc: map[string]bool{"5": true, "7": true}, + start: "5", + want: want{paths: [][]string{{"5", "7", "5"}}}, + }, + { + name: "Wiki 2", + graph: map[string]map[string]bool{ + "1": {"2": true, "6": true}, + "2": {"6": true, "4": true}, + "3": {"9": true, "4": true, "8": true}, + "4": {"1": true, "7": true}, + "5": {"9": true, "8": true}, + "6": {"1": true, "4": true, "7": true}, + "7": {"1": true}, + "8": {"5": true, "3": true}, + "9": {"8": true}, + }, + scc: map[string]bool{ + "1": true, "2": true, "4": true, "6": true, "7": true, + }, + start: "1", + want: want{paths: [][]string{ + {"1", "2", "6", "1"}, + {"1", "2", "6", "4", "1"}, + {"1", "2", "6", "4", "7", "1"}, + {"1", "2", "6", "7", "1"}, + {"1", "2", "4", "1"}, + {"1", "2", "4", "7", "1"}, + {"1", "6", "1"}, + {"1", "6", "7", "1"}, + {"1", "6", "4", "7", "1"}, + {"1", "6", "4", "1"}, + }}, + }, + { + name: "loop in loop", + graph: map[string]map[string]bool{ + "1": {"2": true}, + "2": {"3": true}, + "3": {"1": true, "2": true}, + }, + scc: map[string]bool{ + "1": true, "2": true, "3": true, + }, + start: "1", + want: want{paths: [][]string{ + {"1", "2", "3", "1"}, + {"1", "2", "3", "2"}, + }}, + }, + } + for _, testCase := range tests { + testCase := testCase + t.Run(testCase.name, func(t *testing.T) { + t.Parallel() + require.ElementsMatch(t, builder.FindCyclesInSCC( + testCase.graph, testCase.scc, testCase.start), + testCase.want.paths) + }) + } +} diff --git a/go.mod b/go.mod index da6d04ee..8cbf64a9 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,12 @@ go 1.19 require golang.org/x/tools v0.9.3 require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/stretchr/objx v0.5.0 // indirect + github.com/stretchr/testify v1.8.4 // indirect + golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect golang.org/x/mod v0.10.0 // indirect golang.org/x/sys v0.8.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index c3488613..96381580 100644 --- a/go.sum +++ b/go.sum @@ -1,10 +1,40 @@ +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 h1:VLliZ0d+/avPrXXH+OakdXhpJuEoBZuwh1m2j7U6Iug= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk= golang.org/x/mod v0.10.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.8.0 h1:vSDcovVPld282ceKgDimkRSC8kpaH1dgyc9UMzlt84Y= golang.org/x/tools v0.8.0/go.mod h1:JxBZ99ISMI5ViVkT1tr6tdNmXeTrcpVSD3vZ1RsRdN4= golang.org/x/tools v0.9.3 h1:Gn1I8+64MsuTb/HpH+LmQtNas23LhUVr3rYZ0eKuaMM= golang.org/x/tools v0.9.3/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go index e1d3423c..8f13bf77 100644 --- a/main.go +++ b/main.go @@ -48,6 +48,7 @@ func main() { outputFlag = fs.String("o", "", "output file, defaults to stdout") optimizeBasicLatinFlag = fs.Bool("optimize-basic-latin", false, "generate optimized parser for Unicode Basic Latin character sets") optimizeGrammar = fs.Bool("optimize-grammar", false, "optimize the given grammar (EXPERIMENTAL FEATURE)") + ignoreLeftRecursion = fs.Bool("ignore-left-recursion", false, "ignore errors related to left recursion") optimizeParserFlag = fs.Bool("optimize-parser", false, "generate optimized parser without Debug and Memoize options") recvrNmFlag = fs.String("receiver-name", "c", "receiver name for the generated methods") noBuildFlag = fs.Bool("x", false, "do not build, only parse") @@ -136,7 +137,10 @@ func main() { optimizeParser := builder.Optimize(*optimizeParserFlag) basicLatinOptimize := builder.BasicLatinLookupTable(*optimizeBasicLatinFlag) nolintOpt := builder.Nolint(*nolint) - if err := builder.BuildParser(outBuf, grammar, curNmOpt, optimizeParser, basicLatinOptimize, nolintOpt); err != nil { + leftRecursionIgnorer := builder.IgnoreLeftRecursion(*ignoreLeftRecursion) + if err := builder.BuildParser( + outBuf, grammar, curNmOpt, optimizeParser, basicLatinOptimize, + nolintOpt, leftRecursionIgnorer); err != nil { fmt.Fprintln(os.Stderr, "build error: ", err) exit(5) } From 681b7be5eed50e48673617b1819bdd0f4b05b793 Mon Sep 17 00:00:00 2001 From: "k.molodyakov" Date: Tue, 13 Jun 2023 11:47:27 +0300 Subject: [PATCH 2/2] Add left recursion check --- Makefile | 4 +- ast/ast.go | 468 +++++++++++++++++++++++++++++++++ builder/builder.go | 26 +- builder/left_recursion.go | 134 ++++++++++ builder/left_recursion_test.go | 136 ++++++++++ builder/scc.go | 140 ++++++++++ builder/scc_test.go | 215 +++++++++++++++ go.mod | 6 + go.sum | 27 ++ main.go | 6 +- 10 files changed, 1154 insertions(+), 8 deletions(-) create mode 100644 builder/left_recursion.go create mode 100644 builder/left_recursion_test.go create mode 100644 builder/scc.go create mode 100644 builder/scc_test.go diff --git a/Makefile b/Makefile index ddf0f506..0b33214b 100644 --- a/Makefile +++ b/Makefile @@ -116,7 +116,7 @@ $(TEST_DIR)/goto_state/goto_state.go: $(TEST_DIR)/goto_state/goto_state.peg $(BI $(BINDIR)/pigeon -nolint $< > $@ $(TEST_DIR)/max_expr_cnt/maxexpr.go: $(TEST_DIR)/max_expr_cnt/maxexpr.peg $(BINDIR)/pigeon - $(BINDIR)/pigeon -nolint $< > $@ + $(BINDIR)/pigeon -nolint -ignore-left-recursion $< > $@ $(TEST_DIR)/labeled_failures/labeled_failures.go: $(TEST_DIR)/labeled_failures/labeled_failures.peg $(BINDIR)/pigeon $(BINDIR)/pigeon -nolint $< > $@ @@ -167,7 +167,7 @@ $(TEST_DIR)/issue_70/optimized-grammar/issue_70.go: $(TEST_DIR)/issue_70/issue_7 $(BINDIR)/pigeon -nolint -optimize-grammar $< > $@ $(TEST_DIR)/issue_70b/issue_70b.go: $(TEST_DIR)/issue_70b/issue_70b.peg $(BINDIR)/pigeon - $(BINDIR)/pigeon -nolint --optimize-grammar $< > $@ + $(BINDIR)/pigeon -nolint --optimize-grammar -ignore-left-recursion $< > $@ $(TEST_DIR)/issue_80/issue_80.go: $(TEST_DIR)/issue_80/issue_80.peg $(BINDIR)/pigeon $(BINDIR)/pigeon -nolint $< > $@ diff --git a/ast/ast.go b/ast/ast.go index c34b7d17..6302fcc0 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -35,6 +35,8 @@ type Grammar struct { Rules []*Rule } +var _ Expression = (*Grammar)(nil) + // NewGrammar creates a new grammar at the specified position. func NewGrammar(p Pos) *Grammar { return &Grammar{p: p} @@ -56,6 +58,21 @@ func (g *Grammar) String() string { return buf.String() } +// NullableVisit recursively determines whether an object is nullable +func (g *Grammar) NullableVisit(rules map[string]*Rule) bool { + panic("NullableVisit should not be called on the Grammar") +} + +// IsNullable returns the nullable attribute of the node +func (g *Grammar) IsNullable() bool { + panic("IsNullable should not be called on the Grammar") +} + +// InitialNames returns names of nodes with which an expression can begin +func (g *Grammar) InitialNames() map[string]bool { + panic("InitialNames should not be called on the Grammar") +} + // Rule represents a rule in the PEG grammar. It has a name, an optional // display name to be used in error messages, and an expression. type Rule struct { @@ -63,8 +80,16 @@ type Rule struct { Name *Identifier DisplayName *StringLit Expr Expression + + // for work with left recursion + Visited bool + Nullable bool + LeftRecursive bool + Leader bool } +var _ Expression = (*Rule)(nil) + // NewRule creates a rule with at the specified position and with the // specified name as identifier. func NewRule(p Pos, name *Identifier) *Rule { @@ -80,9 +105,35 @@ func (r *Rule) String() string { r.p, r, r.Name, r.DisplayName, r.Expr) } +// NullableVisit recursively determines whether an object is nullable +func (r *Rule) NullableVisit(rules map[string]*Rule) bool { + if r.Visited { + // A left-recursive rule is considered non-nullable. + return false + } + r.Visited = true + r.Nullable = r.Expr.NullableVisit(rules) + return r.Nullable +} + +// IsNullable returns the nullable attribute of the node +func (r *Rule) IsNullable() bool { + return r.Nullable +} + +// InitialNames returns names of nodes with which an expression can begin +func (r *Rule) InitialNames() map[string]bool { + return r.Expr.InitialNames() +} + // Expression is the interface implemented by all expression types. type Expression interface { Pos() Pos + + // for work with left recursion + NullableVisit(rules map[string]*Rule) bool + IsNullable() bool + InitialNames() map[string]bool } // ChoiceExpr is an ordered sequence of expressions. The parser tries to @@ -91,8 +142,12 @@ type Expression interface { type ChoiceExpr struct { p Pos Alternatives []Expression + + Nullable bool } +var _ Expression = (*ChoiceExpr)(nil) + // NewChoiceExpr creates a choice expression at the specified position. func NewChoiceExpr(p Pos) *ChoiceExpr { return &ChoiceExpr{p: p} @@ -113,6 +168,34 @@ func (c *ChoiceExpr) String() string { return buf.String() } +// NullableVisit recursively determines whether an object is nullable +func (c *ChoiceExpr) NullableVisit(rules map[string]*Rule) bool { + for _, alt := range c.Alternatives { + if alt.NullableVisit(rules) { + c.Nullable = true + return true + } + } + c.Nullable = false + return false +} + +// IsNullable returns the nullable attribute of the node +func (c *ChoiceExpr) IsNullable() bool { + return c.Nullable +} + +// InitialNames returns names of nodes with which an expression can begin +func (c *ChoiceExpr) InitialNames() map[string]bool { + names := make(map[string]bool) + for _, alt := range c.Alternatives { + for name := range alt.InitialNames() { + names[name] = true + } + } + return names +} + // FailureLabel is an identifier, which can by thrown and recovered in a grammar type FailureLabel string @@ -124,8 +207,12 @@ type RecoveryExpr struct { Expr Expression RecoverExpr Expression Labels []FailureLabel + + Nullable bool } +var _ Expression = (*RecoveryExpr)(nil) + // NewRecoveryExpr creates a choice expression at the specified position. func NewRecoveryExpr(p Pos) *RecoveryExpr { return &RecoveryExpr{p: p} @@ -147,6 +234,29 @@ func (r *RecoveryExpr) String() string { return buf.String() } +// NullableVisit recursively determines whether an object is nullable +func (r *RecoveryExpr) NullableVisit(rules map[string]*Rule) bool { + r.Nullable = r.Expr.NullableVisit(rules) || r.RecoverExpr.NullableVisit(rules) + return r.Nullable +} + +// IsNullable returns the nullable attribute of the node +func (r *RecoveryExpr) IsNullable() bool { + return r.Nullable +} + +// InitialNames returns names of nodes with which an expression can begin +func (r *RecoveryExpr) InitialNames() map[string]bool { + names := make(map[string]bool) + for name := range r.Expr.InitialNames() { + names[name] = true + } + for name := range r.RecoverExpr.InitialNames() { + names[name] = true + } + return names +} + // ActionExpr is an expression that has an associated block of code to // execute when the expression matches. type ActionExpr struct { @@ -154,8 +264,12 @@ type ActionExpr struct { Expr Expression Code *CodeBlock FuncIx int + + Nullable bool } +var _ Expression = (*ActionExpr)(nil) + // NewActionExpr creates a new action expression at the specified position. func NewActionExpr(p Pos) *ActionExpr { return &ActionExpr{p: p} @@ -169,6 +283,26 @@ func (a *ActionExpr) String() string { return fmt.Sprintf("%s: %T{Expr: %v, Code: %v}", a.p, a, a.Expr, a.Code) } +// NullableVisit recursively determines whether an object is nullable +func (a *ActionExpr) NullableVisit(rules map[string]*Rule) bool { + a.Nullable = a.Expr.NullableVisit(rules) + return a.Nullable +} + +// IsNullable returns the nullable attribute of the node +func (a *ActionExpr) IsNullable() bool { + return a.Nullable +} + +// InitialNames returns names of nodes with which an expression can begin +func (a *ActionExpr) InitialNames() map[string]bool { + names := make(map[string]bool) + for name := range a.Expr.InitialNames() { + names[name] = true + } + return names +} + // ThrowExpr is an expression that throws an FailureLabel to be catched by a // RecoveryChoiceExpr. type ThrowExpr struct { @@ -176,6 +310,8 @@ type ThrowExpr struct { Label string } +var _ Expression = (*ThrowExpr)(nil) + // NewThrowExpr creates a new throw expression at the specified position. func NewThrowExpr(p Pos) *ThrowExpr { return &ThrowExpr{p: p} @@ -189,13 +325,32 @@ func (t *ThrowExpr) String() string { return fmt.Sprintf("%s: %T{Label: %v}", t.p, t, t.Label) } +// NullableVisit recursively determines whether an object is nullable +func (t *ThrowExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node +func (t *ThrowExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin +func (t *ThrowExpr) InitialNames() map[string]bool { + return make(map[string]bool) +} + // SeqExpr is an ordered sequence of expressions, all of which must match // if the SeqExpr is to be a match itself. type SeqExpr struct { p Pos Exprs []Expression + + Nullable bool } +var _ Expression = (*SeqExpr)(nil) + // NewSeqExpr creates a new sequence expression at the specified position. func NewSeqExpr(p Pos) *SeqExpr { return &SeqExpr{p: p} @@ -216,6 +371,37 @@ func (s *SeqExpr) String() string { return buf.String() } +// NullableVisit recursively determines whether an object is nullable +func (s *SeqExpr) NullableVisit(rules map[string]*Rule) bool { + for _, item := range s.Exprs { + if !item.NullableVisit(rules) { + s.Nullable = false + return false + } + } + s.Nullable = true + return true +} + +// IsNullable returns the nullable attribute of the node +func (s *SeqExpr) IsNullable() bool { + return s.Nullable +} + +// InitialNames returns names of nodes with which an expression can begin +func (s *SeqExpr) InitialNames() map[string]bool { + names := make(map[string]bool) + for _, item := range s.Exprs { + for name := range item.InitialNames() { + names[name] = true + } + if !item.IsNullable() { + break + } + } + return names +} + // LabeledExpr is an expression that has an associated label. Code blocks // can access the value of the expression using that label, that becomes // a local variable in the code. @@ -225,6 +411,8 @@ type LabeledExpr struct { Expr Expression } +var _ Expression = (*LabeledExpr)(nil) + // NewLabeledExpr creates a new labeled expression at the specified position. func NewLabeledExpr(p Pos) *LabeledExpr { return &LabeledExpr{p: p} @@ -238,6 +426,21 @@ func (l *LabeledExpr) String() string { return fmt.Sprintf("%s: %T{Label: %v, Expr: %v}", l.p, l, l.Label, l.Expr) } +// NullableVisit recursively determines whether an object is nullable +func (l *LabeledExpr) NullableVisit(rules map[string]*Rule) bool { + return l.Expr.NullableVisit(rules) +} + +// IsNullable returns the nullable attribute of the node +func (l *LabeledExpr) IsNullable() bool { + return l.Expr.IsNullable() +} + +// InitialNames returns names of nodes with which an expression can begin +func (l *LabeledExpr) InitialNames() map[string]bool { + return l.Expr.InitialNames() +} + // AndExpr is a zero-length matcher that is considered a match if the // expression it contains is a match. type AndExpr struct { @@ -250,6 +453,8 @@ func NewAndExpr(p Pos) *AndExpr { return &AndExpr{p: p} } +var _ Expression = (*AndExpr)(nil) + // Pos returns the starting position of the node. func (a *AndExpr) Pos() Pos { return a.p } @@ -258,6 +463,21 @@ func (a *AndExpr) String() string { return fmt.Sprintf("%s: %T{Expr: %v}", a.p, a, a.Expr) } +// NullableVisit recursively determines whether an object is nullable +func (a *AndExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node +func (a *AndExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin +func (a *AndExpr) InitialNames() map[string]bool { + return make(map[string]bool) +} + // NotExpr is a zero-length matcher that is considered a match if the // expression it contains is not a match. type NotExpr struct { @@ -265,6 +485,8 @@ type NotExpr struct { Expr Expression } +var _ Expression = (*NotExpr)(nil) + // NewNotExpr creates a new not (!) expression at the specified position. func NewNotExpr(p Pos) *NotExpr { return &NotExpr{p: p} @@ -278,12 +500,29 @@ func (n *NotExpr) String() string { return fmt.Sprintf("%s: %T{Expr: %v}", n.p, n, n.Expr) } +// NullableVisit recursively determines whether an object is nullable +func (n *NotExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node +func (n *NotExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin +func (n *NotExpr) InitialNames() map[string]bool { + return make(map[string]bool) +} + // ZeroOrOneExpr is an expression that can be matched zero or one time. type ZeroOrOneExpr struct { p Pos Expr Expression } +var _ Expression = (*ZeroOrOneExpr)(nil) + // NewZeroOrOneExpr creates a new zero or one expression at the specified // position. func NewZeroOrOneExpr(p Pos) *ZeroOrOneExpr { @@ -298,12 +537,29 @@ func (z *ZeroOrOneExpr) String() string { return fmt.Sprintf("%s: %T{Expr: %v}", z.p, z, z.Expr) } +// NullableVisit recursively determines whether an object is nullable +func (z *ZeroOrOneExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node +func (z *ZeroOrOneExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin +func (z *ZeroOrOneExpr) InitialNames() map[string]bool { + return z.Expr.InitialNames() +} + // ZeroOrMoreExpr is an expression that can be matched zero or more times. type ZeroOrMoreExpr struct { p Pos Expr Expression } +var _ Expression = (*ZeroOrMoreExpr)(nil) + // NewZeroOrMoreExpr creates a new zero or more expression at the specified // position. func NewZeroOrMoreExpr(p Pos) *ZeroOrMoreExpr { @@ -318,12 +574,29 @@ func (z *ZeroOrMoreExpr) String() string { return fmt.Sprintf("%s: %T{Expr: %v}", z.p, z, z.Expr) } +// NullableVisit recursively determines whether an object is nullable +func (z *ZeroOrMoreExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node +func (z *ZeroOrMoreExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin +func (z *ZeroOrMoreExpr) InitialNames() map[string]bool { + return z.Expr.InitialNames() +} + // OneOrMoreExpr is an expression that can be matched one or more times. type OneOrMoreExpr struct { p Pos Expr Expression } +var _ Expression = (*OneOrMoreExpr)(nil) + // NewOneOrMoreExpr creates a new one or more expression at the specified // position. func NewOneOrMoreExpr(p Pos) *OneOrMoreExpr { @@ -338,12 +611,31 @@ func (o *OneOrMoreExpr) String() string { return fmt.Sprintf("%s: %T{Expr: %v}", o.p, o, o.Expr) } +// NullableVisit recursively determines whether an object is nullable +func (o *OneOrMoreExpr) NullableVisit(rules map[string]*Rule) bool { + return false +} + +// IsNullable returns the nullable attribute of the node +func (o *OneOrMoreExpr) IsNullable() bool { + return false +} + +// InitialNames returns names of nodes with which an expression can begin +func (o *OneOrMoreExpr) InitialNames() map[string]bool { + return o.Expr.InitialNames() +} + // RuleRefExpr is an expression that references a rule by name. type RuleRefExpr struct { p Pos Name *Identifier + + Nullable bool } +var _ Expression = (*RuleRefExpr)(nil) + // NewRuleRefExpr creates a new rule reference expression at the specified // position. func NewRuleRefExpr(p Pos) *RuleRefExpr { @@ -358,6 +650,28 @@ func (r *RuleRefExpr) String() string { return fmt.Sprintf("%s: %T{Name: %v}", r.p, r, r.Name) } +// NullableVisit recursively determines whether an object is nullable +func (r *RuleRefExpr) NullableVisit(rules map[string]*Rule) bool { + item, ok := rules[r.Name.Val] + if !ok { + // Token or unknown; never empty. + r.Nullable = false + return false + } + r.Nullable = item.NullableVisit(rules) + return r.Nullable +} + +// IsNullable returns the nullable attribute of the node +func (r *RuleRefExpr) IsNullable() bool { + return r.Nullable +} + +// InitialNames returns names of nodes with which an expression can begin +func (r *RuleRefExpr) InitialNames() map[string]bool { + return map[string]bool{r.Name.Val: true} +} + // StateCodeExpr is an expression which can modify the internal state of the parser. type StateCodeExpr struct { p Pos @@ -365,6 +679,8 @@ type StateCodeExpr struct { FuncIx int } +var _ Expression = (*StateCodeExpr)(nil) + // NewStateCodeExpr creates a new state (#) code expression at the specified // position. func NewStateCodeExpr(p Pos) *StateCodeExpr { @@ -379,6 +695,21 @@ func (s *StateCodeExpr) String() string { return fmt.Sprintf("%s: %T{Code: %v}", s.p, s, s.Code) } +// NullableVisit recursively determines whether an object is nullable +func (s *StateCodeExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node +func (s *StateCodeExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin +func (s *StateCodeExpr) InitialNames() map[string]bool { + return make(map[string]bool) +} + // AndCodeExpr is a zero-length matcher that is considered a match if the // code block returns true. type AndCodeExpr struct { @@ -387,6 +718,8 @@ type AndCodeExpr struct { FuncIx int } +var _ Expression = (*AndCodeExpr)(nil) + // NewAndCodeExpr creates a new and (&) code expression at the specified // position. func NewAndCodeExpr(p Pos) *AndCodeExpr { @@ -401,6 +734,21 @@ func (a *AndCodeExpr) String() string { return fmt.Sprintf("%s: %T{Code: %v}", a.p, a, a.Code) } +// NullableVisit recursively determines whether an object is nullable +func (a *AndCodeExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node +func (a *AndCodeExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin +func (a *AndCodeExpr) InitialNames() map[string]bool { + return make(map[string]bool) +} + // NotCodeExpr is a zero-length matcher that is considered a match if the // code block returns false. type NotCodeExpr struct { @@ -409,6 +757,8 @@ type NotCodeExpr struct { FuncIx int } +var _ Expression = (*NotCodeExpr)(nil) + // NewNotCodeExpr creates a new not (!) code expression at the specified // position. func NewNotCodeExpr(p Pos) *NotCodeExpr { @@ -423,6 +773,21 @@ func (n *NotCodeExpr) String() string { return fmt.Sprintf("%s: %T{Code: %v}", n.p, n, n.Code) } +// NullableVisit recursively determines whether an object is nullable +func (n *NotCodeExpr) NullableVisit(rules map[string]*Rule) bool { + return true +} + +// IsNullable returns the nullable attribute of the node +func (n *NotCodeExpr) IsNullable() bool { + return true +} + +// InitialNames returns names of nodes with which an expression can begin +func (n *NotCodeExpr) InitialNames() map[string]bool { + return make(map[string]bool) +} + // LitMatcher is a string literal matcher. The value to match may be a // double-quoted string, a single-quoted single character, or a back-tick // quoted raw string. @@ -431,6 +796,8 @@ type LitMatcher struct { IgnoreCase bool } +var _ Expression = (*LitMatcher)(nil) + // NewLitMatcher creates a new literal matcher at the specified position and // with the specified value. func NewLitMatcher(p Pos, v string) *LitMatcher { @@ -445,6 +812,22 @@ func (l *LitMatcher) String() string { return fmt.Sprintf("%s: %T{Val: %q, IgnoreCase: %t}", l.p, l, l.Val, l.IgnoreCase) } +// NullableVisit recursively determines whether an object is nullable +func (l *LitMatcher) NullableVisit(rules map[string]*Rule) bool { + return l.IsNullable() +} + +// IsNullable returns the nullable attribute of the node +func (l *LitMatcher) IsNullable() bool { + // The string token '' is considered empty. + return len(l.Val) == 0 +} + +// InitialNames returns names of nodes with which an expression can begin +func (l *LitMatcher) InitialNames() map[string]bool { + return make(map[string]bool) +} + // CharClassMatcher is a character class matcher. The value to match must // be one of the specified characters, in a range of characters, or in the // Unicode classes of characters. @@ -457,6 +840,8 @@ type CharClassMatcher struct { UnicodeClasses []string } +var _ Expression = (*CharClassMatcher)(nil) + // NewCharClassMatcher creates a new character class matcher at the specified // position and with the specified raw value. It parses the raw value into // the list of characters, ranges and Unicode classes. @@ -580,11 +965,28 @@ func (c *CharClassMatcher) String() string { c.p, c, c.Val, c.IgnoreCase, c.Inverted) } +// NullableVisit recursively determines whether an object is nullable +func (c *CharClassMatcher) NullableVisit(rules map[string]*Rule) bool { + return c.IsNullable() +} + +// IsNullable returns the nullable attribute of the node +func (c *CharClassMatcher) IsNullable() bool { + return len(c.Chars) == 0 && len(c.Ranges) == 0 && len(c.UnicodeClasses) == 0 +} + +// InitialNames returns names of nodes with which an expression can begin +func (c *CharClassMatcher) InitialNames() map[string]bool { + return make(map[string]bool) +} + // AnyMatcher is a matcher that matches any character except end-of-file. type AnyMatcher struct { posValue } +var _ Expression = (*AnyMatcher)(nil) + // NewAnyMatcher creates a new any matcher at the specified position. The // value is provided for completeness' sake, but it is always the dot. func NewAnyMatcher(p Pos, v string) *AnyMatcher { @@ -599,11 +1001,28 @@ func (a *AnyMatcher) String() string { return fmt.Sprintf("%s: %T{Val: %q}", a.p, a, a.Val) } +// NullableVisit recursively determines whether an object is nullable +func (a *AnyMatcher) NullableVisit(rules map[string]*Rule) bool { + return false +} + +// IsNullable returns the nullable attribute of the node +func (a *AnyMatcher) IsNullable() bool { + return false +} + +// InitialNames returns names of nodes with which an expression can begin +func (a *AnyMatcher) InitialNames() map[string]bool { + return make(map[string]bool) +} + // CodeBlock represents a code block. type CodeBlock struct { posValue } +var _ Expression = (*CodeBlock)(nil) + // NewCodeBlock creates a new code block at the specified position and with // the specified value. The value includes the outer braces. func NewCodeBlock(p Pos, code string) *CodeBlock { @@ -618,11 +1037,28 @@ func (c *CodeBlock) String() string { return fmt.Sprintf("%s: %T{Val: %q}", c.p, c, c.Val) } +// NullableVisit recursively determines whether an object is nullable +func (c *CodeBlock) NullableVisit(rules map[string]*Rule) bool { + panic("NullableVisit should not be called on the CodeBlock") +} + +// IsNullable returns the nullable attribute of the node +func (c *CodeBlock) IsNullable() bool { + panic("IsNullable should not be called on the CodeBlock") +} + +// InitialNames returns names of nodes with which an expression can begin +func (c *CodeBlock) InitialNames() map[string]bool { + panic("InitialNames should not be called on the CodeBlock") +} + // Identifier represents an identifier. type Identifier struct { posValue } +var _ Expression = (*Identifier)(nil) + // NewIdentifier creates a new identifier at the specified position and // with the specified name. func NewIdentifier(p Pos, name string) *Identifier { @@ -637,11 +1073,28 @@ func (i *Identifier) String() string { return fmt.Sprintf("%s: %T{Val: %q}", i.p, i, i.Val) } +// NullableVisit recursively determines whether an object is nullable +func (i *Identifier) NullableVisit(rules map[string]*Rule) bool { + panic("NullableVisit should not be called on the Identifier") +} + +// IsNullable returns the nullable attribute of the node +func (i *Identifier) IsNullable() bool { + panic("IsNullable should not be called on the Identifier") +} + +// InitialNames returns names of nodes with which an expression can begin +func (i *Identifier) InitialNames() map[string]bool { + panic("InitialNames should not be called on the Identifier") +} + // StringLit represents a string literal. type StringLit struct { posValue } +var _ Expression = (*StringLit)(nil) + // NewStringLit creates a new string literal at the specified position and // with the specified value. func NewStringLit(p Pos, val string) *StringLit { @@ -656,6 +1109,21 @@ func (s *StringLit) String() string { return fmt.Sprintf("%s: %T{Val: %q}", s.p, s, s.Val) } +// NullableVisit recursively determines whether an object is nullable +func (s *StringLit) NullableVisit(rules map[string]*Rule) bool { + panic("NullableVisit should not be called on the StringLit") +} + +// IsNullable returns the nullable attribute of the node +func (s *StringLit) IsNullable() bool { + panic("IsNullable should not be called on the StringLit") +} + +// InitialNames returns names of nodes with which an expression can begin +func (s *StringLit) InitialNames() map[string]bool { + panic("InitialNames should not be called on the StringLit") +} + type posValue struct { p Pos Val string diff --git a/builder/builder.go b/builder/builder.go index ee76666a..9d067327 100644 --- a/builder/builder.go +++ b/builder/builder.go @@ -77,6 +77,17 @@ func Optimize(optimize bool) Option { } } +// IgnoreLeftRecursion returns an option that specifies the ignoreLeftRecursion option +// If ignoreLeftRecursion is true, errors associated +// with the use of left recursion rules are ignored. +func IgnoreLeftRecursion(ignore bool) Option { + return func(b *builder) Option { + prev := b.optimize + b.ignoreLeftRecursion = ignore + return IgnoreLeftRecursion(prev) + } +} + // Nolint returns an option that specifies the nolint option // If nolint is true, special '// nolint: ...' comments are added // to the generated parser to suppress warnings by gometalinter. @@ -118,6 +129,7 @@ type builder struct { basicLatinLookupTable bool globalState bool nolint bool + ignoreLeftRecursion bool ruleName string exprIndex int @@ -132,11 +144,15 @@ func (b *builder) setOptions(opts []Option) { } } -func (b *builder) buildParser(g *ast.Grammar) error { - b.writeInit(g.Init) - b.writeGrammar(g) - - for _, rule := range g.Rules { +func (b *builder) buildParser(grammar *ast.Grammar) error { + if err := PrepareGramma(grammar); err != nil { + if !b.ignoreLeftRecursion { + return fmt.Errorf("uncorrect gramma: %w", err) + } + } + b.writeInit(grammar.Init) + b.writeGrammar(grammar) + for _, rule := range grammar.Rules { b.writeRuleCode(rule) } b.writeStaticCode() diff --git a/builder/left_recursion.go b/builder/left_recursion.go new file mode 100644 index 00000000..a93c8b26 --- /dev/null +++ b/builder/left_recursion.go @@ -0,0 +1,134 @@ +package builder + +import ( + "errors" + "fmt" + + "github.com/mna/pigeon/ast" +) + +var ( + // ErrNoLeader is no leader error. + ErrNoLeader = errors.New( + "SCC has no leadership candidate (no element is included in all cycles)") + // ErrHaveLeftRecirsion is recursion error. + ErrHaveLeftRecirsion = errors.New("have left recursion") +) + +// PrepareGramma evaluates parameters associated with left recursion +func PrepareGramma(grammar *ast.Grammar) error { + mapRules := make(map[string]*ast.Rule, len(grammar.Rules)) + for _, rule := range grammar.Rules { + mapRules[rule.Name.Val] = rule + } + ComputeNullables(mapRules) + if err := ComputeLeftRecursives(mapRules); err != nil { + return fmt.Errorf("error compute left recursive: %w", err) + } + rulesWithLeftRecursion := []string{} + for _, rule := range grammar.Rules { + if rule.LeftRecursive { + rulesWithLeftRecursion = append(rulesWithLeftRecursion, rule.Name.Val) + } + } + if len(rulesWithLeftRecursion) > 0 { + return fmt.Errorf("%w: %v", ErrHaveLeftRecirsion, rulesWithLeftRecursion) + } + + return nil +} + +// ComputeNullables evaluates nullable nodes +func ComputeNullables(rules map[string]*ast.Rule) { + // Compute which rules in a grammar are nullable + for _, rule := range rules { + rule.NullableVisit(rules) + } +} + +func findLeader( + graph map[string]map[string]bool, scc map[string]bool, +) (string, error) { + // Try to find a leader such that all cycles go through it. + leaders := make(map[string]bool, len(scc)) + for k := range scc { + leaders[k] = true + } + for start := range scc { + for _, cycle := range FindCyclesInSCC(graph, scc, start) { + mapCycle := map[string]bool{} + for _, k := range cycle { + mapCycle[k] = true + } + for k := range scc { + if _, okCycle := mapCycle[k]; !okCycle { + delete(leaders, k) + } + } + if len(leaders) == 0 { + return "", ErrNoLeader + } + } + } + // Pick an arbitrary leader from the candidates. + var leader string + for k := range leaders { + leader = k // The only element. + break + } + return leader, nil +} + +// ComputeLeftRecursives evaluates left recursion +func ComputeLeftRecursives(rules map[string]*ast.Rule) error { + graph := MakeFirstGraph(rules) + vertices := make([]string, 0, len(graph)) + for k := range graph { + vertices = append(vertices, k) + } + sccs := StronglyConnectedComponents(vertices, graph) + for _, scc := range sccs { + if len(scc) > 1 { + for name := range scc { + rules[name].LeftRecursive = true + } + leader, err := findLeader(graph, scc) + if err != nil { + return fmt.Errorf("error find leader %v: %w", scc, err) + } + rules[leader].Leader = true + } else { + var name string + for k := range scc { + name = k // The only element. + break + } + if _, ok := graph[name][name]; ok { + rules[name].LeftRecursive = true + rules[name].Leader = true + } + } + } + return nil +} + +// MakeFirstGraph compute the graph of left-invocations. +// There's an edge from A to B if A may invoke B at its initial position. +// Note that this requires the nullable flags to have been computed. +func MakeFirstGraph(rules map[string]*ast.Rule) map[string]map[string]bool { + graph := make(map[string]map[string]bool) + vertices := make(map[string]bool) + for rulename, rule := range rules { + names := rule.InitialNames() + graph[rulename] = names + for name := range names { + vertices[name] = true + } + } + for vertex := range vertices { + if _, ok := graph[vertex]; !ok { + graph[vertex] = make(map[string]bool) + } + } + return graph +} diff --git a/builder/left_recursion_test.go b/builder/left_recursion_test.go new file mode 100644 index 00000000..64aad87b --- /dev/null +++ b/builder/left_recursion_test.go @@ -0,0 +1,136 @@ +package builder_test + +import ( + "strings" + "testing" + + "github.com/mna/pigeon/ast" + "github.com/mna/pigeon/bootstrap" + "github.com/mna/pigeon/builder" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestLeftRrecursive(t *testing.T) { + t.Parallel() + grammar := ` + start = expr NEWLINE + expr = ('-' term / expr '+' term / term) + term = NUMBER + foo = NAME+ + bar = NAME* + baz = NAME? + ` + p := bootstrap.NewParser() + g, err := p.Parse("", strings.NewReader(grammar)) + require.NoError(t, err) + err = builder.PrepareGramma(g) + require.ErrorIs(t, err, builder.ErrHaveLeftRecirsion) + mapRules := make(map[string]*ast.Rule, len(g.Rules)) + for _, rule := range g.Rules { + mapRules[rule.Name.Val] = rule + } + assert.False(t, mapRules["start"].LeftRecursive) + assert.True(t, mapRules["expr"].LeftRecursive) + assert.False(t, mapRules["term"].LeftRecursive) + assert.False(t, mapRules["foo"].LeftRecursive) + assert.False(t, mapRules["bar"].LeftRecursive) + assert.False(t, mapRules["baz"].LeftRecursive) +} + +func TestNullable(t *testing.T) { + t.Parallel() + grammar := ` + start = sign NUMBER + sign = ('-' / '+')? + ` + p := bootstrap.NewParser() + g, err := p.Parse("", strings.NewReader(grammar)) + require.NoError(t, err) + err = builder.PrepareGramma(g) + require.NoError(t, err) + mapRules := make(map[string]*ast.Rule, len(g.Rules)) + for _, rule := range g.Rules { + mapRules[rule.Name.Val] = rule + } + assert.False(t, mapRules["start"].Nullable) + assert.True(t, mapRules["sign"].Nullable) +} + +func TestAdvancedLeftRrecursive(t *testing.T) { + t.Parallel() + grammar := ` + start = NUMBER / sign start + sign = '-'? + ` + p := bootstrap.NewParser() + g, err := p.Parse("", strings.NewReader(grammar)) + require.NoError(t, err) + err = builder.PrepareGramma(g) + require.ErrorIs(t, err, builder.ErrHaveLeftRecirsion) + mapRules := make(map[string]*ast.Rule, len(g.Rules)) + for _, rule := range g.Rules { + mapRules[rule.Name.Val] = rule + } + assert.False(t, mapRules["start"].Nullable) + assert.True(t, mapRules["sign"].Nullable) + assert.True(t, mapRules["start"].LeftRecursive) + assert.False(t, mapRules["sign"].LeftRecursive) +} + +func TestMutuallyLeftRrecursive(t *testing.T) { + t.Parallel() + grammar := ` + start = foo 'E' + foo = bar 'A' / 'B' + bar = foo 'C' / 'D' + ` + p := bootstrap.NewParser() + g, err := p.Parse("", strings.NewReader(grammar)) + require.NoError(t, err) + err = builder.PrepareGramma(g) + require.ErrorIs(t, err, builder.ErrHaveLeftRecirsion) + mapRules := make(map[string]*ast.Rule, len(g.Rules)) + for _, rule := range g.Rules { + mapRules[rule.Name.Val] = rule + } + assert.False(t, mapRules["start"].LeftRecursive) + assert.True(t, mapRules["foo"].LeftRecursive) + assert.True(t, mapRules["bar"].LeftRecursive) +} + +func TestNastyMutuallyLeftRrecursive(t *testing.T) { + t.Parallel() + grammar := ` + start = target '=' + target = maybe '+' / NAME + maybe = maybe '-' / target + ` + p := bootstrap.NewParser() + g, err := p.Parse("", strings.NewReader(grammar)) + require.NoError(t, err) + err = builder.PrepareGramma(g) + require.ErrorIs(t, err, builder.ErrHaveLeftRecirsion) + mapRules := make(map[string]*ast.Rule, len(g.Rules)) + for _, rule := range g.Rules { + mapRules[rule.Name.Val] = rule + } + assert.False(t, mapRules["start"].LeftRecursive) + assert.True(t, mapRules["target"].LeftRecursive) + assert.True(t, mapRules["maybe"].LeftRecursive) +} + +func TestLeftRecursionTooComplex(t *testing.T) { + t.Parallel() + grammar := ` + start = foo + foo = bar '+' / baz '+' / '+' + bar = baz '-' / foo '-' / '-' + baz = foo '*' / bar '*' / '*' + ` + p := bootstrap.NewParser() + g, err := p.Parse("", strings.NewReader(grammar)) + require.NoError(t, err) + err = builder.PrepareGramma(g) + require.ErrorIs(t, err, builder.ErrNoLeader) +} diff --git a/builder/scc.go b/builder/scc.go new file mode 100644 index 00000000..2de71eae --- /dev/null +++ b/builder/scc.go @@ -0,0 +1,140 @@ +package builder + +import "fmt" + +func min(a1 int, a2 int) int { + if a1 <= a2 { + return a1 + } + return a2 +} + +// StronglyConnectedComponents compute strongly сonnected сomponents of a graph. +// Tarjan's strongly connected components algorithm +func StronglyConnectedComponents( + vertices []string, edges map[string]map[string]bool, +) []map[string]bool { + // Tarjan's strongly connected components algorithm + var ( + identified = map[string]bool{} + stack = []string{} + index = map[string]int{} + lowlink = map[string]int{} + dfs func(v string) []map[string]bool + ) + + dfs = func(vertex string) []map[string]bool { + index[vertex] = len(stack) + stack = append(stack, vertex) + lowlink[vertex] = index[vertex] + + sccs := []map[string]bool{} + for w := range edges[vertex] { + if _, ok := index[w]; !ok { + sccs = append(sccs, dfs(w)...) + lowlink[vertex] = min(lowlink[vertex], lowlink[w]) + } else if _, ok := identified[w]; !ok { + lowlink[vertex] = min(lowlink[vertex], lowlink[w]) + } + } + + if lowlink[vertex] == index[vertex] { + scc := map[string]bool{} + for _, v := range stack[index[vertex]:] { + scc[v] = true + } + stack = stack[:index[vertex]] + for v := range scc { + identified[v] = true + } + sccs = append(sccs, scc) + } + return sccs + } + + sccs := []map[string]bool{} + for _, v := range vertices { + if _, ok := index[v]; !ok { + sccs = append(sccs, dfs(v)...) + } + } + return sccs +} + +func contains(s []string, e string) bool { + for _, a := range s { + if a == e { + return true + } + } + return false +} + +func reduceGraph( + graph map[string]map[string]bool, scc map[string]bool, +) map[string]map[string]bool { + reduceGraph := map[string]map[string]bool{} + for src, dsts := range graph { + if _, ok := scc[src]; !ok { + continue + } + reduceGraph[src] = map[string]bool{} + for dst := range dsts { + if _, ok := scc[dst]; !ok { + continue + } + reduceGraph[src][dst] = true + } + } + return reduceGraph +} + +// FindCyclesInSCC find cycles in SCC emanating from start. +// Yields lists of the form ['A', 'B', 'C', 'A'], which means there's +// a path from A -> B -> C -> A. The first item is always the start +// argument, but the last item may be another element, e.g. ['A', +// 'B', 'C', 'B'] means there's a path from A to B and there's a +// cycle from B to C and back. +func FindCyclesInSCC( + graph map[string]map[string]bool, scc map[string]bool, start string, +) [][]string { + // Basic input checks. + if _, ok := scc[start]; !ok { + panic(fmt.Sprintf("scc %v have not %v", scc, start)) + } + extravertices := []string{} + for k := range scc { + if _, ok := graph[k]; !ok { + extravertices = append(extravertices, k) + } + } + if len(extravertices) != 0 { + panic(fmt.Sprintf("graph have not scc. %v", extravertices)) + } + + // Reduce the graph to nodes in the SCC. + graph = reduceGraph(graph, scc) + if _, ok := graph[start]; !ok { + panic(fmt.Sprintf("graph %v have not %v", graph, start)) + } + + // Recursive helper that yields cycles. + var dfs func(node string, path []string) [][]string + dfs = func(node string, path []string) [][]string { + ret := [][]string{} + if contains(path, node) { + t := make([]string, 0, len(path)+1) + t = append(t, path...) + t = append(t, node) + ret = append(ret, t) + return ret + } + path = append(path, node) // TODO: Make this not quadratic. + for child := range graph[node] { + ret = append(ret, dfs(child, path)...) + } + return ret + } + + return dfs(start, []string{}) +} diff --git a/builder/scc_test.go b/builder/scc_test.go new file mode 100644 index 00000000..71161434 --- /dev/null +++ b/builder/scc_test.go @@ -0,0 +1,215 @@ +package builder_test + +import ( + "testing" + + "github.com/mna/pigeon/builder" + "github.com/stretchr/testify/require" +) + +func TestStronglyConnectedComponents(t *testing.T) { //nolint:funlen + t.Parallel() + + type want struct { + sccs []map[string]bool + } + + tests := []struct { + name string + graph map[string]map[string]bool + want want + }{ + { + name: "Simple", + graph: map[string]map[string]bool{ + "1": {"2": true}, + "2": {"1": true}, + }, + want: want{sccs: []map[string]bool{ + {"2": true, "1": true}, + }}, + }, + { + name: "Without scc", + graph: map[string]map[string]bool{ + "1": {"2": true}, + }, + want: want{sccs: []map[string]bool{ + {"2": true}, + {"1": true}, + }}, + }, + { + name: "One element", + graph: map[string]map[string]bool{ + "1": {}, + }, + want: want{sccs: []map[string]bool{ + {"1": true}, + }}, + }, + { + name: "One element with loop", + graph: map[string]map[string]bool{ + "1": {"1": true}, + }, + want: want{sccs: []map[string]bool{ + {"1": true}, + }}, + }, + { + name: "Wiki 1", + graph: map[string]map[string]bool{ + "1": {"2": true}, + "2": {"3": true}, + "3": {"1": true}, + "4": {"2": true, "3": true, "6": true}, + "5": {"3": true, "7": true}, + "6": {"4": true, "5": true}, + "7": {"5": true}, + "8": {"6": true, "7": true, "8": true}, + }, + want: want{sccs: []map[string]bool{ + {"2": true, "3": true, "1": true}, + {"5": true, "7": true}, + {"4": true, "6": true}, + {"8": true}, + }}, + }, + { + name: "Wiki 2", + graph: map[string]map[string]bool{ + "1": {"2": true, "6": true}, + "2": {"6": true, "4": true}, + "3": {"9": true, "4": true, "8": true}, + "4": {"1": true, "7": true}, + "5": {"9": true, "8": true}, + "6": {"1": true, "4": true, "7": true}, + "7": {"1": true}, + "8": {"5": true, "3": true}, + "9": {"8": true}, + }, + want: want{sccs: []map[string]bool{ + {"1": true, "2": true, "4": true, "6": true, "7": true}, + {"3": true, "5": true, "9": true, "8": true}, + }}, + }, + } + + for _, testCase := range tests { + testCase := testCase + t.Run(testCase.name, func(t *testing.T) { + t.Parallel() + vertices := make([]string, 0, len(testCase.graph)) + for k := range testCase.graph { + vertices = append(vertices, k) + } + require.ElementsMatch(t, builder.StronglyConnectedComponents( + vertices, testCase.graph), testCase.want.sccs) + }) + } +} + +func TestFindCyclesInSCC(t *testing.T) { //nolint:funlen + t.Parallel() + + type want struct { + paths [][]string + } + + tests := []struct { + name string + graph map[string]map[string]bool + scc map[string]bool + start string + want want + }{ + { + name: "Wiki 1 1", + graph: map[string]map[string]bool{ + "1": {"2": true}, + "2": {"3": true}, + "3": {"1": true}, + "4": {"2": true, "3": true, "6": true}, + "5": {"3": true, "7": true}, + "6": {"4": true, "5": true}, + "7": {"5": true}, + "8": {"6": true, "7": true, "8": true}, + }, + scc: map[string]bool{"2": true, "3": true, "1": true}, + start: "3", + want: want{paths: [][]string{{"3", "1", "2", "3"}}}, + }, + { + name: "Wiki 1 2", + graph: map[string]map[string]bool{ + "1": {"2": true}, + "2": {"3": true}, + "3": {"1": true}, + "4": {"2": true, "3": true, "6": true}, + "5": {"3": true, "7": true}, + "6": {"4": true, "5": true}, + "7": {"5": true}, + "8": {"6": true, "7": true, "8": true}, + }, + scc: map[string]bool{"5": true, "7": true}, + start: "5", + want: want{paths: [][]string{{"5", "7", "5"}}}, + }, + { + name: "Wiki 2", + graph: map[string]map[string]bool{ + "1": {"2": true, "6": true}, + "2": {"6": true, "4": true}, + "3": {"9": true, "4": true, "8": true}, + "4": {"1": true, "7": true}, + "5": {"9": true, "8": true}, + "6": {"1": true, "4": true, "7": true}, + "7": {"1": true}, + "8": {"5": true, "3": true}, + "9": {"8": true}, + }, + scc: map[string]bool{ + "1": true, "2": true, "4": true, "6": true, "7": true, + }, + start: "1", + want: want{paths: [][]string{ + {"1", "2", "6", "1"}, + {"1", "2", "6", "4", "1"}, + {"1", "2", "6", "4", "7", "1"}, + {"1", "2", "6", "7", "1"}, + {"1", "2", "4", "1"}, + {"1", "2", "4", "7", "1"}, + {"1", "6", "1"}, + {"1", "6", "7", "1"}, + {"1", "6", "4", "7", "1"}, + {"1", "6", "4", "1"}, + }}, + }, + { + name: "loop in loop", + graph: map[string]map[string]bool{ + "1": {"2": true}, + "2": {"3": true}, + "3": {"1": true, "2": true}, + }, + scc: map[string]bool{ + "1": true, "2": true, "3": true, + }, + start: "1", + want: want{paths: [][]string{ + {"1", "2", "3", "1"}, + {"1", "2", "3", "2"}, + }}, + }, + } + for _, testCase := range tests { + testCase := testCase + t.Run(testCase.name, func(t *testing.T) { + t.Parallel() + require.ElementsMatch(t, builder.FindCyclesInSCC( + testCase.graph, testCase.scc, testCase.start), + testCase.want.paths) + }) + } +} diff --git a/go.mod b/go.mod index b260a3d0..5cdce67a 100644 --- a/go.mod +++ b/go.mod @@ -7,4 +7,10 @@ require golang.org/x/tools v0.10.0 require ( golang.org/x/mod v0.11.0 // indirect golang.org/x/sys v0.9.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/stretchr/objx v0.5.0 // indirect + github.com/stretchr/testify v1.8.4 // indirect + golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 1b85af0a..3fe54c21 100644 --- a/go.sum +++ b/go.sum @@ -4,3 +4,30 @@ golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/tools v0.10.0 h1:tvDr/iQoUqNdohiYm0LmmKcBk+q86lb9EprIUFhHHGg= golang.org/x/tools v0.10.0/go.mod h1:UJwyiVBsOA2uwvK/e5OY3GTpDUJriEd+/YlqAwLPmyM= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 h1:VLliZ0d+/avPrXXH+OakdXhpJuEoBZuwh1m2j7U6Iug= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= + diff --git a/main.go b/main.go index e1d3423c..8f13bf77 100644 --- a/main.go +++ b/main.go @@ -48,6 +48,7 @@ func main() { outputFlag = fs.String("o", "", "output file, defaults to stdout") optimizeBasicLatinFlag = fs.Bool("optimize-basic-latin", false, "generate optimized parser for Unicode Basic Latin character sets") optimizeGrammar = fs.Bool("optimize-grammar", false, "optimize the given grammar (EXPERIMENTAL FEATURE)") + ignoreLeftRecursion = fs.Bool("ignore-left-recursion", false, "ignore errors related to left recursion") optimizeParserFlag = fs.Bool("optimize-parser", false, "generate optimized parser without Debug and Memoize options") recvrNmFlag = fs.String("receiver-name", "c", "receiver name for the generated methods") noBuildFlag = fs.Bool("x", false, "do not build, only parse") @@ -136,7 +137,10 @@ func main() { optimizeParser := builder.Optimize(*optimizeParserFlag) basicLatinOptimize := builder.BasicLatinLookupTable(*optimizeBasicLatinFlag) nolintOpt := builder.Nolint(*nolint) - if err := builder.BuildParser(outBuf, grammar, curNmOpt, optimizeParser, basicLatinOptimize, nolintOpt); err != nil { + leftRecursionIgnorer := builder.IgnoreLeftRecursion(*ignoreLeftRecursion) + if err := builder.BuildParser( + outBuf, grammar, curNmOpt, optimizeParser, basicLatinOptimize, + nolintOpt, leftRecursionIgnorer); err != nil { fmt.Fprintln(os.Stderr, "build error: ", err) exit(5) }