Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move a specific regex to static variable #346

Merged
merged 5 commits into from
Apr 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions internal/cachedregexp/regex.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package cachedregexp

import (
"regexp"
"sync"
)

var cache sync.Map

func MustCompile(exp string) *regexp.Regexp {
compiled, ok := cache.Load(exp)
if !ok {
compiled, _ = cache.LoadOrStore(exp, regexp.MustCompile(exp))
}

return compiled.(*regexp.Regexp)
}
7 changes: 4 additions & 3 deletions internal/semantic/version-maven.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@ package semantic

import (
"fmt"
"regexp"
"sort"
"strings"

"github.com/google/osv-scanner/internal/cachedregexp"
)

type mavenVersionToken struct {
Expand Down Expand Up @@ -175,11 +176,11 @@ func (mv MavenVersion) lessThan(mw MavenVersion) bool {
// According to Maven's implementation, any non-digit is a "character":
// https://github.com/apache/maven/blob/965aaa53da5c2d814e94a41d37142d0d6830375d/maven-artifact/src/main/java/org/apache/maven/artifact/versioning/ComparableVersion.java#L627
func mavenFindTransitions(token string) (ints []int) {
for _, span := range regexp.MustCompile(`\D\d`).FindAllStringIndex(token, -1) {
for _, span := range cachedregexp.MustCompile(`\D\d`).FindAllStringIndex(token, -1) {
ints = append(ints, span[0]+1)
}

for _, span := range regexp.MustCompile(`\d\D`).FindAllStringIndex(token, -1) {
for _, span := range cachedregexp.MustCompile(`\d\D`).FindAllStringIndex(token, -1) {
ints = append(ints, span[0]+1)
}

Expand Down
9 changes: 5 additions & 4 deletions internal/semantic/version-packagist.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package semantic

import (
"regexp"
"strconv"
"strings"

"github.com/google/osv-scanner/internal/cachedregexp"
)

func canonicalizePackagistVersion(v string) string {
Expand All @@ -15,9 +16,9 @@ func canonicalizePackagistVersion(v string) string {
// the trimming...)
v = strings.TrimPrefix(strings.TrimPrefix(v, "v"), "V")

v = regexp.MustCompile(`[-_+]`).ReplaceAllString(v, ".")
v = regexp.MustCompile(`([^\d.])(\d)`).ReplaceAllString(v, "$1.$2")
v = regexp.MustCompile(`(\d)([^\d.])`).ReplaceAllString(v, "$1.$2")
v = cachedregexp.MustCompile(`[-_+]`).ReplaceAllString(v, ".")
v = cachedregexp.MustCompile(`([^\d.])(\d)`).ReplaceAllString(v, "$1.$2")
v = cachedregexp.MustCompile(`(\d)([^\d.])`).ReplaceAllString(v, "$1.$2")

return v
}
Expand Down
11 changes: 6 additions & 5 deletions internal/semantic/version-pypi.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ package semantic
import (
"fmt"
"math/big"
"regexp"
"strings"

"github.com/google/osv-scanner/internal/cachedregexp"
)

type PyPIVersion struct {
Expand Down Expand Up @@ -67,7 +68,7 @@ func parseLetterVersion(letter, number string) letterAndNumber {
}

func parseLocalVersion(local string) (parts []string) {
for _, part := range regexp.MustCompile(`[._-]`).Split(local, -1) {
for _, part := range cachedregexp.MustCompile(`[._-]`).Split(local, -1) {
parts = append(parts, strings.ToLower(part))
}

Expand All @@ -88,7 +89,7 @@ func normalizePyPILegacyPart(part string) string {
part = "@"
}

if regexp.MustCompile(`\d`).MatchString(part[:1]) {
if cachedregexp.MustCompile(`\d`).MatchString(part[:1]) {
// pad for numeric comparison
return fmt.Sprintf("%08s", part)
}
Expand All @@ -97,7 +98,7 @@ func normalizePyPILegacyPart(part string) string {
}

func parsePyPIVersionParts(str string) (parts []string) {
re := regexp.MustCompile(`(\d+|[a-z]+|\.|-)`)
re := cachedregexp.MustCompile(`(\d+|[a-z]+|\.|-)`)

splits := re.FindAllString(str, -1)
splits = append(splits, "final")
Expand Down Expand Up @@ -137,7 +138,7 @@ func parsePyPIVersion(str string) PyPIVersion {
str = strings.ToLower(str)

// from https://peps.python.org/pep-0440/#appendix-b-parsing-version-strings-with-regular-expressions
re := regexp.MustCompile(`^\s*v?(?:(?:(?P<epoch>[0-9]+)!)?(?P<release>[0-9]+(?:\.[0-9]+)*)(?P<pre>[-_\.]?(?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))[-_\.]?(?P<pre_n>[0-9]+)?)?(?P<post>(?:-(?P<post_n1>[0-9]+))|(?:[-_\.]?(?P<post_l>post|rev|r)[-_\.]?(?P<post_n2>[0-9]+)?))?(?P<dev>[-_\.]?(?P<dev_l>dev)[-_\.]?(?P<dev_n>[0-9]+)?)?)(?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?\s*$`)
re := cachedregexp.MustCompile(`^\s*v?(?:(?:(?P<epoch>[0-9]+)!)?(?P<release>[0-9]+(?:\.[0-9]+)*)(?P<pre>[-_\.]?(?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))[-_\.]?(?P<pre_n>[0-9]+)?)?(?P<post>(?:-(?P<post_n1>[0-9]+))|(?:[-_\.]?(?P<post_l>post|rev|r)[-_\.]?(?P<post_n2>[0-9]+)?))?(?P<dev>[-_\.]?(?P<dev_l>dev)[-_\.]?(?P<dev_n>[0-9]+)?)?)(?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?\s*$`)
match := re.FindStringSubmatch(str)

if len(match) == 0 {
Expand Down
5 changes: 3 additions & 2 deletions internal/semantic/version-semver-like.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ package semantic
import (
"fmt"
"math/big"
"regexp"
"strings"

"github.com/google/osv-scanner/internal/cachedregexp"
)

// SemverLikeVersion is a version that is _like_ a version as defined by the
Expand Down Expand Up @@ -55,7 +56,7 @@ func parseSemverLike(line string) SemverLikeVersion {
var components []*big.Int
originStr := line

numberReg := regexp.MustCompile(`\d`)
numberReg := cachedregexp.MustCompile(`\d`)

currentCom := ""
foundBuild := false
Expand Down
5 changes: 3 additions & 2 deletions pkg/lockfile/dpkg-status.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ import (
"bufio"
"fmt"
"os"
"regexp"
"sort"
"strings"

"github.com/google/osv-scanner/internal/cachedregexp"
)

const DebianEcosystem Ecosystem = "Debian"
Expand Down Expand Up @@ -38,7 +39,7 @@ func groupDpkgPackageLines(scanner *bufio.Scanner) [][]string {
// Return name and version if "Source" field contains them
func parseSourceField(source string) (string, string) {
// Pattern: name (version)
re := regexp.MustCompile(`^(.*)\((.*)\)`)
re := cachedregexp.MustCompile(`^(.*)\((.*)\)`)
matches := re.FindStringSubmatch(source)
if len(matches) == 3 {
return strings.TrimSpace(matches[1]), strings.TrimSpace(matches[2])
Expand Down
13 changes: 7 additions & 6 deletions pkg/lockfile/parse-gemfile-lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ import (
"fmt"
"log"
"os"
"regexp"
"strings"

"github.com/google/osv-scanner/internal/cachedregexp"
)

const BundlerEcosystem Ecosystem = "RubyGems"
Expand Down Expand Up @@ -55,8 +56,8 @@ func (parser *gemfileLockfileParser) addDependency(name string, version string)
}

func (parser *gemfileLockfileParser) parseSpec(line string) {
// nameVersionReg := regexp.MustCompile(`^( {2}| {4}| {6})(?! )(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`)
nameVersionReg := regexp.MustCompile(`^( +)(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`)
// nameVersionReg := cachedregexp.MustCompile(`^( {2}| {4}| {6})(?! )(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`)
nameVersionReg := cachedregexp.MustCompile(`^( +)(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`)

results := nameVersionReg.FindStringSubmatch(line)

Expand All @@ -82,7 +83,7 @@ func (parser *gemfileLockfileParser) parseSource(line string) {
}

// OPTIONS = /^ ([a-z]+): (.*)$/i.freeze
optionsRegexp := regexp.MustCompile(`(?i)^ {2}([a-z]+): (.*)$`)
optionsRegexp := cachedregexp.MustCompile(`(?i)^ {2}([a-z]+): (.*)$`)

// todo: support
options := optionsRegexp.FindStringSubmatch(line)
Expand All @@ -105,7 +106,7 @@ func (parser *gemfileLockfileParser) parseSource(line string) {
}

func isNotIndented(line string) bool {
re := regexp.MustCompile(`^\S`)
re := cachedregexp.MustCompile(`^\S`)

return re.MatchString(line)
}
Expand All @@ -127,7 +128,7 @@ func (parser *gemfileLockfileParser) parseLineBasedOnState(line string) {
}

func (parser *gemfileLockfileParser) parse(contents string) {
lineMatcher := regexp.MustCompile(`(?:\r?\n)+`)
lineMatcher := cachedregexp.MustCompile(`(?:\r?\n)+`)

lines := lineMatcher.Split(contents, -1)

Expand Down
7 changes: 4 additions & 3 deletions pkg/lockfile/parse-maven-lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ import (
"encoding/xml"
"fmt"
"os"
"regexp"

"github.com/google/osv-scanner/internal/cachedregexp"
)

type MavenLockDependency struct {
Expand All @@ -15,7 +16,7 @@ type MavenLockDependency struct {
}

func (mld MavenLockDependency) parseResolvedVersion(version string) string {
versionRequirementReg := regexp.MustCompile(`[[(]?(.*?)(?:,|[)\]]|$)`)
versionRequirementReg := cachedregexp.MustCompile(`[[(]?(.*?)(?:,|[)\]]|$)`)

results := versionRequirementReg.FindStringSubmatch(version)

Expand All @@ -27,7 +28,7 @@ func (mld MavenLockDependency) parseResolvedVersion(version string) string {
}

func (mld MavenLockDependency) resolveVersionValue(lockfile MavenLockFile) string {
interpolationReg := regexp.MustCompile(`\${(.+)}`)
interpolationReg := cachedregexp.MustCompile(`\${(.+)}`)

results := interpolationReg.FindStringSubmatch(mld.Version)

Expand Down
5 changes: 3 additions & 2 deletions pkg/lockfile/parse-mix-lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ import (
"bufio"
"fmt"
"os"
"regexp"
"strings"

"github.com/google/osv-scanner/internal/cachedregexp"
)

const MixEcosystem Ecosystem = "Hex"
Expand All @@ -17,7 +18,7 @@ func ParseMixLock(pathToLockfile string) ([]PackageDetails, error) {
}
defer file.Close()

re := regexp.MustCompile(`^ +"(\w+)": \{.+,$`)
re := cachedregexp.MustCompile(`^ +"(\w+)": \{.+,$`)

scanner := bufio.NewScanner(file)

Expand Down
8 changes: 4 additions & 4 deletions pkg/lockfile/parse-pnpm-lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ package lockfile
import (
"fmt"
"os"
"regexp"
"strconv"
"strings"

"github.com/google/osv-scanner/internal/cachedregexp"
"gopkg.in/yaml.v3"
)

Expand Down Expand Up @@ -55,7 +55,7 @@ func (l *PnpmLockfile) UnmarshalYAML(unmarshal func(interface{}) error) error {
const PnpmEcosystem = NpmEcosystem

func startsWithNumber(str string) bool {
matcher := regexp.MustCompile(`^\d`)
matcher := cachedregexp.MustCompile(`^\d`)

return matcher.MatchString(str)
}
Expand Down Expand Up @@ -108,7 +108,7 @@ func extractPnpmPackageNameAndVersion(dependencyPath string) (string, string) {

func parseNameAtVersion(value string) (name string, version string) {
// look for pattern "name@version", where name is allowed to contain zero or more "@"
matches := regexp.MustCompile(`^(.+)@([\d.]+)$`).FindStringSubmatch(value)
matches := cachedregexp.MustCompile(`^(.+)@([\d.]+)$`).FindStringSubmatch(value)

if len(matches) != 3 {
return name, ""
Expand Down Expand Up @@ -142,7 +142,7 @@ func parsePnpmLock(lockfile PnpmLockfile) []PackageDetails {
commit := pkg.Resolution.Commit

if strings.HasPrefix(pkg.Resolution.Tarball, "https://codeload.github.com") {
re := regexp.MustCompile(`https://codeload\.github\.com(?:/[\w-.]+){2}/tar\.gz/(\w+)$`)
re := cachedregexp.MustCompile(`https://codeload\.github\.com(?:/[\w-.]+){2}/tar\.gz/(\w+)$`)
matched := re.FindStringSubmatch(pkg.Resolution.Tarball)

if matched != nil {
Expand Down
7 changes: 4 additions & 3 deletions pkg/lockfile/parse-requirements-txt.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ import (
"fmt"
"os"
"path/filepath"
"regexp"
"strings"

"github.com/google/osv-scanner/internal/cachedregexp"
)

const PipEcosystem Ecosystem = "PyPI"
Expand Down Expand Up @@ -67,15 +68,15 @@ func parseLine(line string) PackageDetails {
// than false negatives, and can be dealt with when/if it actually happens.
func normalizedRequirementName(name string) string {
// per https://www.python.org/dev/peps/pep-0503/#normalized-names
name = regexp.MustCompile(`[-_.]+`).ReplaceAllString(name, "-")
name = cachedregexp.MustCompile(`[-_.]+`).ReplaceAllString(name, "-")
name = strings.ToLower(name)
name = strings.Split(name, "[")[0]

return name
}

func removeComments(line string) string {
var re = regexp.MustCompile(`(^|\s+)#.*$`)
var re = cachedregexp.MustCompile(`(^|\s+)#.*$`)

return strings.TrimSpace(re.ReplaceAllString(line, ""))
}
Expand Down
9 changes: 5 additions & 4 deletions pkg/lockfile/parse-yarn-lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ import (
"fmt"
"net/url"
"os"
"regexp"
"strings"

"github.com/google/osv-scanner/internal/cachedregexp"
)

const YarnEcosystem = NpmEcosystem
Expand Down Expand Up @@ -63,7 +64,7 @@ func extractYarnPackageName(str string) string {
}

func determineYarnPackageVersion(group []string) string {
re := regexp.MustCompile(`^ {2}"?version"?:? "?([\w-.]+)"?$`)
re := cachedregexp.MustCompile(`^ {2}"?version"?:? "?([\w-.]+)"?$`)

for _, s := range group {
matched := re.FindStringSubmatch(s)
Expand All @@ -78,7 +79,7 @@ func determineYarnPackageVersion(group []string) string {
}

func determineYarnPackageResolution(group []string) string {
re := regexp.MustCompile(`^ {2}"?(?:resolution:|resolved)"? "([^ '"]+)"$`)
re := cachedregexp.MustCompile(`^ {2}"?(?:resolution:|resolved)"? "([^ '"]+)"$`)

for _, s := range group {
matched := re.FindStringSubmatch(s)
Expand Down Expand Up @@ -111,7 +112,7 @@ func tryExtractCommit(resolution string) string {
}

for _, matcher := range matchers {
re := regexp.MustCompile(matcher)
re := cachedregexp.MustCompile(matcher)
matched := re.FindStringSubmatch(resolution)

if matched != nil {
Expand Down