diff --git a/internal/cachedregexp/regex.go b/internal/cachedregexp/regex.go new file mode 100644 index 0000000000..59ebd36a19 --- /dev/null +++ b/internal/cachedregexp/regex.go @@ -0,0 +1,17 @@ +package cachedregexp + +import ( + "regexp" + "sync" +) + +var cache sync.Map + +func MustCompile(exp string) *regexp.Regexp { + compiled, ok := cache.Load(exp) + if !ok { + compiled, _ = cache.LoadOrStore(exp, regexp.MustCompile(exp)) + } + + return compiled.(*regexp.Regexp) +} diff --git a/internal/semantic/version-maven.go b/internal/semantic/version-maven.go index 9cf5c9e53f..ec8d26ecf9 100644 --- a/internal/semantic/version-maven.go +++ b/internal/semantic/version-maven.go @@ -2,9 +2,10 @@ package semantic import ( "fmt" - "regexp" "sort" "strings" + + "github.com/google/osv-scanner/internal/cachedregexp" ) type mavenVersionToken struct { @@ -175,11 +176,11 @@ func (mv MavenVersion) lessThan(mw MavenVersion) bool { // According to Maven's implementation, any non-digit is a "character": // https://github.com/apache/maven/blob/965aaa53da5c2d814e94a41d37142d0d6830375d/maven-artifact/src/main/java/org/apache/maven/artifact/versioning/ComparableVersion.java#L627 func mavenFindTransitions(token string) (ints []int) { - for _, span := range regexp.MustCompile(`\D\d`).FindAllStringIndex(token, -1) { + for _, span := range cachedregexp.MustCompile(`\D\d`).FindAllStringIndex(token, -1) { ints = append(ints, span[0]+1) } - for _, span := range regexp.MustCompile(`\d\D`).FindAllStringIndex(token, -1) { + for _, span := range cachedregexp.MustCompile(`\d\D`).FindAllStringIndex(token, -1) { ints = append(ints, span[0]+1) } diff --git a/internal/semantic/version-packagist.go b/internal/semantic/version-packagist.go index 1dad01ac31..ef5d9ae99e 100644 --- a/internal/semantic/version-packagist.go +++ b/internal/semantic/version-packagist.go @@ -1,9 +1,10 @@ package semantic import ( - "regexp" "strconv" "strings" + + "github.com/google/osv-scanner/internal/cachedregexp" ) func canonicalizePackagistVersion(v string) string { @@ -15,9 +16,9 @@ func canonicalizePackagistVersion(v string) string { // the trimming...) v = strings.TrimPrefix(strings.TrimPrefix(v, "v"), "V") - v = regexp.MustCompile(`[-_+]`).ReplaceAllString(v, ".") - v = regexp.MustCompile(`([^\d.])(\d)`).ReplaceAllString(v, "$1.$2") - v = regexp.MustCompile(`(\d)([^\d.])`).ReplaceAllString(v, "$1.$2") + v = cachedregexp.MustCompile(`[-_+]`).ReplaceAllString(v, ".") + v = cachedregexp.MustCompile(`([^\d.])(\d)`).ReplaceAllString(v, "$1.$2") + v = cachedregexp.MustCompile(`(\d)([^\d.])`).ReplaceAllString(v, "$1.$2") return v } diff --git a/internal/semantic/version-pypi.go b/internal/semantic/version-pypi.go index 885aaa5229..7ca8548076 100644 --- a/internal/semantic/version-pypi.go +++ b/internal/semantic/version-pypi.go @@ -3,8 +3,9 @@ package semantic import ( "fmt" "math/big" - "regexp" "strings" + + "github.com/google/osv-scanner/internal/cachedregexp" ) type PyPIVersion struct { @@ -67,7 +68,7 @@ func parseLetterVersion(letter, number string) letterAndNumber { } func parseLocalVersion(local string) (parts []string) { - for _, part := range regexp.MustCompile(`[._-]`).Split(local, -1) { + for _, part := range cachedregexp.MustCompile(`[._-]`).Split(local, -1) { parts = append(parts, strings.ToLower(part)) } @@ -88,7 +89,7 @@ func normalizePyPILegacyPart(part string) string { part = "@" } - if regexp.MustCompile(`\d`).MatchString(part[:1]) { + if cachedregexp.MustCompile(`\d`).MatchString(part[:1]) { // pad for numeric comparison return fmt.Sprintf("%08s", part) } @@ -97,7 +98,7 @@ func normalizePyPILegacyPart(part string) string { } func parsePyPIVersionParts(str string) (parts []string) { - re := regexp.MustCompile(`(\d+|[a-z]+|\.|-)`) + re := cachedregexp.MustCompile(`(\d+|[a-z]+|\.|-)`) splits := re.FindAllString(str, -1) splits = append(splits, "final") @@ -137,7 +138,7 @@ func parsePyPIVersion(str string) PyPIVersion { str = strings.ToLower(str) // from https://peps.python.org/pep-0440/#appendix-b-parsing-version-strings-with-regular-expressions - re := regexp.MustCompile(`^\s*v?(?:(?:(?P[0-9]+)!)?(?P[0-9]+(?:\.[0-9]+)*)(?P
[-_\.]?(?P(a|b|c|rc|alpha|beta|pre|preview))[-_\.]?(?P[0-9]+)?)?(?P(?:-(?P[0-9]+))|(?:[-_\.]?(?Ppost|rev|r)[-_\.]?(?P[0-9]+)?))?(?P[-_\.]?(?Pdev)[-_\.]?(?P[0-9]+)?)?)(?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?\s*$`)
+	re := cachedregexp.MustCompile(`^\s*v?(?:(?:(?P[0-9]+)!)?(?P[0-9]+(?:\.[0-9]+)*)(?P
[-_\.]?(?P(a|b|c|rc|alpha|beta|pre|preview))[-_\.]?(?P[0-9]+)?)?(?P(?:-(?P[0-9]+))|(?:[-_\.]?(?Ppost|rev|r)[-_\.]?(?P[0-9]+)?))?(?P[-_\.]?(?Pdev)[-_\.]?(?P[0-9]+)?)?)(?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?\s*$`)
 	match := re.FindStringSubmatch(str)
 
 	if len(match) == 0 {
diff --git a/internal/semantic/version-semver-like.go b/internal/semantic/version-semver-like.go
index d9480192d0..fbb63bc689 100644
--- a/internal/semantic/version-semver-like.go
+++ b/internal/semantic/version-semver-like.go
@@ -3,8 +3,9 @@ package semantic
 import (
 	"fmt"
 	"math/big"
-	"regexp"
 	"strings"
+
+	"github.com/google/osv-scanner/internal/cachedregexp"
 )
 
 // SemverLikeVersion is a version that is _like_ a version as defined by the
@@ -55,7 +56,7 @@ func parseSemverLike(line string) SemverLikeVersion {
 	var components []*big.Int
 	originStr := line
 
-	numberReg := regexp.MustCompile(`\d`)
+	numberReg := cachedregexp.MustCompile(`\d`)
 
 	currentCom := ""
 	foundBuild := false
diff --git a/pkg/lockfile/dpkg-status.go b/pkg/lockfile/dpkg-status.go
index 41cc872d6c..77cf62be48 100644
--- a/pkg/lockfile/dpkg-status.go
+++ b/pkg/lockfile/dpkg-status.go
@@ -4,9 +4,10 @@ import (
 	"bufio"
 	"fmt"
 	"os"
-	"regexp"
 	"sort"
 	"strings"
+
+	"github.com/google/osv-scanner/internal/cachedregexp"
 )
 
 const DebianEcosystem Ecosystem = "Debian"
@@ -38,7 +39,7 @@ func groupDpkgPackageLines(scanner *bufio.Scanner) [][]string {
 // Return name and version if "Source" field contains them
 func parseSourceField(source string) (string, string) {
 	// Pattern: name (version)
-	re := regexp.MustCompile(`^(.*)\((.*)\)`)
+	re := cachedregexp.MustCompile(`^(.*)\((.*)\)`)
 	matches := re.FindStringSubmatch(source)
 	if len(matches) == 3 {
 		return strings.TrimSpace(matches[1]), strings.TrimSpace(matches[2])
diff --git a/pkg/lockfile/parse-gemfile-lock.go b/pkg/lockfile/parse-gemfile-lock.go
index e03efbf247..c3409466b6 100644
--- a/pkg/lockfile/parse-gemfile-lock.go
+++ b/pkg/lockfile/parse-gemfile-lock.go
@@ -4,8 +4,9 @@ import (
 	"fmt"
 	"log"
 	"os"
-	"regexp"
 	"strings"
+
+	"github.com/google/osv-scanner/internal/cachedregexp"
 )
 
 const BundlerEcosystem Ecosystem = "RubyGems"
@@ -55,8 +56,8 @@ func (parser *gemfileLockfileParser) addDependency(name string, version string)
 }
 
 func (parser *gemfileLockfileParser) parseSpec(line string) {
-	// nameVersionReg := regexp.MustCompile(`^( {2}| {4}| {6})(?! )(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`)
-	nameVersionReg := regexp.MustCompile(`^( +)(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`)
+	// nameVersionReg := cachedregexp.MustCompile(`^( {2}| {4}| {6})(?! )(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`)
+	nameVersionReg := cachedregexp.MustCompile(`^( +)(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`)
 
 	results := nameVersionReg.FindStringSubmatch(line)
 
@@ -82,7 +83,7 @@ func (parser *gemfileLockfileParser) parseSource(line string) {
 	}
 
 	// OPTIONS      = /^  ([a-z]+): (.*)$/i.freeze
-	optionsRegexp := regexp.MustCompile(`(?i)^ {2}([a-z]+): (.*)$`)
+	optionsRegexp := cachedregexp.MustCompile(`(?i)^ {2}([a-z]+): (.*)$`)
 
 	// todo: support
 	options := optionsRegexp.FindStringSubmatch(line)
@@ -105,7 +106,7 @@ func (parser *gemfileLockfileParser) parseSource(line string) {
 }
 
 func isNotIndented(line string) bool {
-	re := regexp.MustCompile(`^\S`)
+	re := cachedregexp.MustCompile(`^\S`)
 
 	return re.MatchString(line)
 }
@@ -127,7 +128,7 @@ func (parser *gemfileLockfileParser) parseLineBasedOnState(line string) {
 }
 
 func (parser *gemfileLockfileParser) parse(contents string) {
-	lineMatcher := regexp.MustCompile(`(?:\r?\n)+`)
+	lineMatcher := cachedregexp.MustCompile(`(?:\r?\n)+`)
 
 	lines := lineMatcher.Split(contents, -1)
 
diff --git a/pkg/lockfile/parse-maven-lock.go b/pkg/lockfile/parse-maven-lock.go
index acb402f0f7..e0d9608fa3 100644
--- a/pkg/lockfile/parse-maven-lock.go
+++ b/pkg/lockfile/parse-maven-lock.go
@@ -4,7 +4,8 @@ import (
 	"encoding/xml"
 	"fmt"
 	"os"
-	"regexp"
+
+	"github.com/google/osv-scanner/internal/cachedregexp"
 )
 
 type MavenLockDependency struct {
@@ -15,7 +16,7 @@ type MavenLockDependency struct {
 }
 
 func (mld MavenLockDependency) parseResolvedVersion(version string) string {
-	versionRequirementReg := regexp.MustCompile(`[[(]?(.*?)(?:,|[)\]]|$)`)
+	versionRequirementReg := cachedregexp.MustCompile(`[[(]?(.*?)(?:,|[)\]]|$)`)
 
 	results := versionRequirementReg.FindStringSubmatch(version)
 
@@ -27,7 +28,7 @@ func (mld MavenLockDependency) parseResolvedVersion(version string) string {
 }
 
 func (mld MavenLockDependency) resolveVersionValue(lockfile MavenLockFile) string {
-	interpolationReg := regexp.MustCompile(`\${(.+)}`)
+	interpolationReg := cachedregexp.MustCompile(`\${(.+)}`)
 
 	results := interpolationReg.FindStringSubmatch(mld.Version)
 
diff --git a/pkg/lockfile/parse-mix-lock.go b/pkg/lockfile/parse-mix-lock.go
index dc1eeb741e..e0769914bd 100644
--- a/pkg/lockfile/parse-mix-lock.go
+++ b/pkg/lockfile/parse-mix-lock.go
@@ -4,8 +4,9 @@ import (
 	"bufio"
 	"fmt"
 	"os"
-	"regexp"
 	"strings"
+
+	"github.com/google/osv-scanner/internal/cachedregexp"
 )
 
 const MixEcosystem Ecosystem = "Hex"
@@ -17,7 +18,7 @@ func ParseMixLock(pathToLockfile string) ([]PackageDetails, error) {
 	}
 	defer file.Close()
 
-	re := regexp.MustCompile(`^ +"(\w+)": \{.+,$`)
+	re := cachedregexp.MustCompile(`^ +"(\w+)": \{.+,$`)
 
 	scanner := bufio.NewScanner(file)
 
diff --git a/pkg/lockfile/parse-pnpm-lock.go b/pkg/lockfile/parse-pnpm-lock.go
index 37a6331252..4e3679463c 100644
--- a/pkg/lockfile/parse-pnpm-lock.go
+++ b/pkg/lockfile/parse-pnpm-lock.go
@@ -3,10 +3,10 @@ package lockfile
 import (
 	"fmt"
 	"os"
-	"regexp"
 	"strconv"
 	"strings"
 
+	"github.com/google/osv-scanner/internal/cachedregexp"
 	"gopkg.in/yaml.v3"
 )
 
@@ -55,7 +55,7 @@ func (l *PnpmLockfile) UnmarshalYAML(unmarshal func(interface{}) error) error {
 const PnpmEcosystem = NpmEcosystem
 
 func startsWithNumber(str string) bool {
-	matcher := regexp.MustCompile(`^\d`)
+	matcher := cachedregexp.MustCompile(`^\d`)
 
 	return matcher.MatchString(str)
 }
@@ -108,7 +108,7 @@ func extractPnpmPackageNameAndVersion(dependencyPath string) (string, string) {
 
 func parseNameAtVersion(value string) (name string, version string) {
 	// look for pattern "name@version", where name is allowed to contain zero or more "@"
-	matches := regexp.MustCompile(`^(.+)@([\d.]+)$`).FindStringSubmatch(value)
+	matches := cachedregexp.MustCompile(`^(.+)@([\d.]+)$`).FindStringSubmatch(value)
 
 	if len(matches) != 3 {
 		return name, ""
@@ -142,7 +142,7 @@ func parsePnpmLock(lockfile PnpmLockfile) []PackageDetails {
 		commit := pkg.Resolution.Commit
 
 		if strings.HasPrefix(pkg.Resolution.Tarball, "https://codeload.github.com") {
-			re := regexp.MustCompile(`https://codeload\.github\.com(?:/[\w-.]+){2}/tar\.gz/(\w+)$`)
+			re := cachedregexp.MustCompile(`https://codeload\.github\.com(?:/[\w-.]+){2}/tar\.gz/(\w+)$`)
 			matched := re.FindStringSubmatch(pkg.Resolution.Tarball)
 
 			if matched != nil {
diff --git a/pkg/lockfile/parse-requirements-txt.go b/pkg/lockfile/parse-requirements-txt.go
index 2a753a6133..4f46a0b3e4 100644
--- a/pkg/lockfile/parse-requirements-txt.go
+++ b/pkg/lockfile/parse-requirements-txt.go
@@ -5,8 +5,9 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
-	"regexp"
 	"strings"
+
+	"github.com/google/osv-scanner/internal/cachedregexp"
 )
 
 const PipEcosystem Ecosystem = "PyPI"
@@ -67,7 +68,7 @@ func parseLine(line string) PackageDetails {
 // than false negatives, and can be dealt with when/if it actually happens.
 func normalizedRequirementName(name string) string {
 	// per https://www.python.org/dev/peps/pep-0503/#normalized-names
-	name = regexp.MustCompile(`[-_.]+`).ReplaceAllString(name, "-")
+	name = cachedregexp.MustCompile(`[-_.]+`).ReplaceAllString(name, "-")
 	name = strings.ToLower(name)
 	name = strings.Split(name, "[")[0]
 
@@ -75,7 +76,7 @@ func normalizedRequirementName(name string) string {
 }
 
 func removeComments(line string) string {
-	var re = regexp.MustCompile(`(^|\s+)#.*$`)
+	var re = cachedregexp.MustCompile(`(^|\s+)#.*$`)
 
 	return strings.TrimSpace(re.ReplaceAllString(line, ""))
 }
diff --git a/pkg/lockfile/parse-yarn-lock.go b/pkg/lockfile/parse-yarn-lock.go
index 96a4493a1d..8309c22a5f 100644
--- a/pkg/lockfile/parse-yarn-lock.go
+++ b/pkg/lockfile/parse-yarn-lock.go
@@ -5,8 +5,9 @@ import (
 	"fmt"
 	"net/url"
 	"os"
-	"regexp"
 	"strings"
+
+	"github.com/google/osv-scanner/internal/cachedregexp"
 )
 
 const YarnEcosystem = NpmEcosystem
@@ -63,7 +64,7 @@ func extractYarnPackageName(str string) string {
 }
 
 func determineYarnPackageVersion(group []string) string {
-	re := regexp.MustCompile(`^ {2}"?version"?:? "?([\w-.]+)"?$`)
+	re := cachedregexp.MustCompile(`^ {2}"?version"?:? "?([\w-.]+)"?$`)
 
 	for _, s := range group {
 		matched := re.FindStringSubmatch(s)
@@ -78,7 +79,7 @@ func determineYarnPackageVersion(group []string) string {
 }
 
 func determineYarnPackageResolution(group []string) string {
-	re := regexp.MustCompile(`^ {2}"?(?:resolution:|resolved)"? "([^ '"]+)"$`)
+	re := cachedregexp.MustCompile(`^ {2}"?(?:resolution:|resolved)"? "([^ '"]+)"$`)
 
 	for _, s := range group {
 		matched := re.FindStringSubmatch(s)
@@ -111,7 +112,7 @@ func tryExtractCommit(resolution string) string {
 	}
 
 	for _, matcher := range matchers {
-		re := regexp.MustCompile(matcher)
+		re := cachedregexp.MustCompile(matcher)
 		matched := re.FindStringSubmatch(resolution)
 
 		if matched != nil {