-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregex.go
140 lines (124 loc) · 2.93 KB
/
regex.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
package goabnf
import (
"fmt"
"strings"
)
// Regex builds a regex that validates the given rulename.
//
// It imply that the rulename does not contain any cycle in its
// dependency graph as it would not be able to compile it.
//
// Notice it produces non-optimised regular expressions, such that
// it is easy to produce a better-performing one by hand.
func (g *Grammar) Regex(rulename string) (string, error) {
// Check can generate safely i.e. no infinite recursino
isCyclic, err := g.RuleContainsCycle(rulename)
if err != nil {
return "", err
}
if isCyclic {
return "", &ErrCyclicRule{
Rulename: rulename,
}
}
rule := GetRule(rulename, g.Rulemap)
return rule.regex(g)
}
func (r Rule) regex(g *Grammar) (string, error) {
return r.Alternation.regex(g)
}
func (alt Alternation) regex(g *Grammar) (string, error) {
str := "("
for _, cnt := range alt.Concatenations {
reg, err := cnt.regex(g)
if err != nil {
return "", err
}
str += reg + "|"
}
return strings.TrimRight(str, "|") + ")", nil
}
func (cnt Concatenation) regex(g *Grammar) (string, error) {
str := ""
for _, rep := range cnt.Repetitions {
reg, err := rep.regex(g)
if err != nil {
return "", err
}
str += "(" + reg + ")"
}
return str, nil
}
func (rep Repetition) regex(g *Grammar) (string, error) {
reps := ""
switch {
case rep.Min == rep.Max:
reps = fmt.Sprintf("{%d}", rep.Min)
case rep.Min == 0:
if rep.Max == inf {
reps = "*"
} else {
reps = fmt.Sprintf("{,%d}", rep.Max)
}
case rep.Max == inf:
reps = fmt.Sprintf("{%d,}", rep.Min)
}
reg, err := rep.Element.regex(g)
if err != nil {
return "", err
}
str := "(" + reg + ")" + reps + ""
return str, nil
}
func (e ElemRulename) regex(g *Grammar) (string, error) {
rule := GetRule(e.Name, g.Rulemap)
return rule.regex(g)
}
func (e ElemGroup) regex(g *Grammar) (string, error) {
reg, err := e.Alternation.regex(g)
if err != nil {
return "", err
}
return "(" + reg + ")", nil
}
func (e ElemOption) regex(g *Grammar) (string, error) {
reg, err := e.Alternation.regex(g)
if err != nil {
return "", err
}
return "(" + reg + ")?", nil
}
func (e ElemProseVal) regex(g *Grammar) (string, error) {
return "", ErrHandlingProseVal
}
func (e ElemNumVal) regex(g *Grammar) (string, error) {
reg := ""
switch e.Status {
case StatRange:
min, max := atob(e.Elems[0], e.Base), atob(e.Elems[1], e.Base)
for i := min; i <= max; i++ {
reg += regescape(i)
}
case StatSeries:
for _, b := range e.Elems {
reg += regescape(atob(b, e.Base))
}
}
return "[" + reg + "]", nil
}
func (e ElemCharVal) regex(g *Grammar) (string, error) {
str := ""
for _, b := range e.Values {
str += regescape(b)
}
return str, nil
}
func regescape(b byte) string {
s := string(b)
// If common character, don't escape
if (b >= 'A' && b <= 'Z') || (b >= 'a' && b <= 'z') || (b >= '0' && b <= '9') {
return s
}
// Else escape by default, should fit the Go regex compiler
return "\\" + s
}