Skip to content

Commit

Permalink
Optimizing regexp calls to improve performance (qax-os#1532)
Browse files Browse the repository at this point in the history
  • Loading branch information
sillydong authored and xuri committed Jul 11, 2023
1 parent e4ad7b5 commit cb5aacd
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 34 deletions.
50 changes: 23 additions & 27 deletions calc.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,24 @@ var (
return fmt.Sprintf("R[%d]C[%d]", row, col), nil
},
}
formularFormats = []*regexp.Regexp{
regexp.MustCompile(`^(\d+)$`),
regexp.MustCompile(`^=(.*)$`),
regexp.MustCompile(`^<>(.*)$`),
regexp.MustCompile(`^<=(.*)$`),
regexp.MustCompile(`^>=(.*)$`),
regexp.MustCompile(`^<(.*)$`),
regexp.MustCompile(`^>(.*)$`),
}
formularCriterias = []byte{
criteriaEq,
criteriaEq,
criteriaNe,
criteriaLe,
criteriaGe,
criteriaL,
criteriaG,
}
)

// calcContext defines the formula execution context.
Expand Down Expand Up @@ -1654,33 +1672,11 @@ func formulaCriteriaParser(exp string) (fc *formulaCriteria) {
if exp == "" {
return
}
if match := regexp.MustCompile(`^(\d+)$`).FindStringSubmatch(exp); len(match) > 1 {
fc.Type, fc.Condition = criteriaEq, match[1]
return
}
if match := regexp.MustCompile(`^=(.*)$`).FindStringSubmatch(exp); len(match) > 1 {
fc.Type, fc.Condition = criteriaEq, match[1]
return
}
if match := regexp.MustCompile(`^<>(.*)$`).FindStringSubmatch(exp); len(match) > 1 {
fc.Type, fc.Condition = criteriaNe, match[1]
return
}
if match := regexp.MustCompile(`^<=(.*)$`).FindStringSubmatch(exp); len(match) > 1 {
fc.Type, fc.Condition = criteriaLe, match[1]
return
}
if match := regexp.MustCompile(`^>=(.*)$`).FindStringSubmatch(exp); len(match) > 1 {
fc.Type, fc.Condition = criteriaGe, match[1]
return
}
if match := regexp.MustCompile(`^<(.*)$`).FindStringSubmatch(exp); len(match) > 1 {
fc.Type, fc.Condition = criteriaL, match[1]
return
}
if match := regexp.MustCompile(`^>(.*)$`).FindStringSubmatch(exp); len(match) > 1 {
fc.Type, fc.Condition = criteriaG, match[1]
return
for i, re := range formularFormats {
if match := re.FindStringSubmatch(exp); len(match) > 1 {
fc.Type, fc.Condition = formularCriterias[i], match[1]
return
}
}
if strings.Contains(exp, "?") {
exp = strings.ReplaceAll(exp, "?", ".")
Expand Down
2 changes: 1 addition & 1 deletion sheet.go
Original file line number Diff line number Diff line change
Expand Up @@ -977,6 +977,7 @@ func (f *File) searchSheet(name, value string, regSearch bool) (result []string,
if sst, err = f.sharedStringsReader(); err != nil {
return
}
regex := regexp.MustCompile(value)
decoder := f.xmlNewDecoder(bytes.NewReader(f.readBytes(name)))
for {
var token xml.Token
Expand All @@ -1001,7 +1002,6 @@ func (f *File) searchSheet(name, value string, regSearch bool) (result []string,
_ = decoder.DecodeElement(&colCell, &xmlElement)
val, _ := colCell.getValueFrom(f, sst, false)
if regSearch {
regex := regexp.MustCompile(value)
if !regex.MatchString(val) {
continue
}
Expand Down
17 changes: 11 additions & 6 deletions table.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ import (
"unicode/utf8"
)

var (
expressionFormat = regexp.MustCompile(`"(?:[^"]|"")*"|\S+`)
conditionFormat = regexp.MustCompile(`(or|\|\|)`)
blankFormat = regexp.MustCompile("blanks|nonblanks")
matchFormat = regexp.MustCompile("[*?]")
)

// parseTableOptions provides a function to parse the format settings of the
// table with default value.
func parseTableOptions(opts *Table) (*Table, error) {
Expand Down Expand Up @@ -400,8 +407,7 @@ func (f *File) autoFilter(sheet, ref string, columns, col int, opts []AutoFilter
return fmt.Errorf("incorrect index of column '%s'", opt.Column)
}
fc := &xlsxFilterColumn{ColID: offset}
re := regexp.MustCompile(`"(?:[^"]|"")*"|\S+`)
token := re.FindAllString(opt.Expression, -1)
token := expressionFormat.FindAllString(opt.Expression, -1)
if len(token) != 3 && len(token) != 7 {
return fmt.Errorf("incorrect number of tokens in criteria '%s'", opt.Expression)
}
Expand Down Expand Up @@ -484,8 +490,7 @@ func (f *File) parseFilterExpression(expression string, tokens []string) ([]int,
// expressions).
conditional := 0
c := tokens[3]
re, _ := regexp.Match(`(or|\|\|)`, []byte(c))
if re {
if conditionFormat.Match([]byte(c)) {
conditional = 1
}
expression1, token1, err := f.parseFilterTokens(expression, tokens[:3])
Expand Down Expand Up @@ -533,7 +538,7 @@ func (f *File) parseFilterTokens(expression string, tokens []string) ([]int, str
}
token := tokens[2]
// Special handling for Blanks/NonBlanks.
re, _ := regexp.Match("blanks|nonblanks", []byte(strings.ToLower(token)))
re := blankFormat.Match([]byte(strings.ToLower(token)))
if re {
// Only allow Equals or NotEqual in this context.
if operator != 2 && operator != 5 {
Expand All @@ -558,7 +563,7 @@ func (f *File) parseFilterTokens(expression string, tokens []string) ([]int, str
}
// If the string token contains an Excel match character then change the
// operator type to indicate a non "simple" equality.
re, _ = regexp.Match("[*?]", []byte(token))
re = matchFormat.Match([]byte(token))
if operator == 2 && re {
operator = 22
}
Expand Down

0 comments on commit cb5aacd

Please sign in to comment.