diff --git a/syft/pkg/cataloger/cataloger.go b/syft/pkg/cataloger/cataloger.go index e59388ff07f1..8498da114ee3 100644 --- a/syft/pkg/cataloger/cataloger.go +++ b/syft/pkg/cataloger/cataloger.go @@ -161,9 +161,29 @@ func contains(enabledPartial []string, catalogerName string) bool { if partial == "" { continue } - if strings.Contains(catalogerName, partial) { + if hasFullWord(partial, catalogerName) { return true } } return false } + +func hasFullWord(targetPhrase, candidate string) bool { + if targetPhrase == "cataloger" || targetPhrase == "" { + return false + } + start := strings.Index(candidate, targetPhrase) + if start == -1 { + return false + } + + if start > 0 && candidate[start-1] != '-' { + return false + } + + end := start + len(targetPhrase) + if end < len(candidate) && candidate[end] != '-' { + return false + } + return true +} diff --git a/syft/pkg/cataloger/cataloger_test.go b/syft/pkg/cataloger/cataloger_test.go index 5dfd5ccb26d2..35cde7797d05 100644 --- a/syft/pkg/cataloger/cataloger_test.go +++ b/syft/pkg/cataloger/cataloger_test.go @@ -25,6 +25,38 @@ func (d dummy) Catalog(_ source.FileResolver) ([]pkg.Package, []artifact.Relatio } func Test_filterCatalogers(t *testing.T) { + largeCatalogerList := []string{ + "alpmdb-cataloger", + "apkdb-cataloger", + "binary-cataloger", + "conan-cataloger", + "dartlang-lock-cataloger", + "dpkgdb-cataloger", + "dotnet-deps-cataloger", + "elixir-mix-lock-cataloger", + "erlang-rebar-lock-cataloger", + "go-mod-file-cataloger", + "go-module-binary-cataloger", + "haskell-cataloger", + "graalvm-native-image-cataloger", + "java-cataloger", + "java-pom-cataloger", + "javascript-package-cataloger", + "javascript-lock-cataloger", + "php-composer-installed-cataloger", + "php-composer-lock-cataloger", + "portage-cataloger", + "python-index-cataloger", + "python-package-cataloger", + "rpm-db-cataloger", + "rpm-file-cataloger", + "ruby-gemfile-cataloger", + "ruby-gemspec-cataloger", + "rust-cargo-lock-cataloger", + "cargo-auditable-binary-cataloger", + "sbom-cataloger", + "cocoapods-cataloger", + } tests := []struct { name string patterns []string @@ -144,6 +176,43 @@ func Test_filterCatalogers(t *testing.T) { "go-module-binary-cataloger", }, }, + { + name: "don't cross match ecosystems with matching prefix", + patterns: []string{ + "java-cataloger", + }, + catalogers: []string{ + "javascript-package-cataloger", + "java-cataloger", + }, + want: []string{ + "java-cataloger", + }, + }, + { + name: "don't cross match ecosystems with short, common name", + patterns: []string{ + "go", + }, + catalogers: largeCatalogerList, + want: []string{ + "go-mod-file-cataloger", + "go-module-binary-cataloger", + //"rust-cargo-lock-cataloger", // with naive "contains" matching + //"cargo-auditable-binary-cataloger", // with naive "contains" matching + }, + }, + { + name: "ignore partial matches", + patterns: []string{ + "mod", + }, + catalogers: largeCatalogerList, + want: []string{ + "go-mod-file-cataloger", + //"go-module-binary-cataloger", // unfortunately not a full word (this should probably be renamed) + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -162,8 +231,6 @@ func Test_filterCatalogers(t *testing.T) { } func Test_contains(t *testing.T) { - type args struct { - } tests := []struct { name string enabledCatalogers []string @@ -201,3 +268,73 @@ func Test_contains(t *testing.T) { }) } } + +func Test_hasFullWord(t *testing.T) { + + tests := []struct { + name string + targetPhrase string + candidate string + want bool + }{ + { + name: "exact match", + targetPhrase: "php-composer-installed-cataloger", + candidate: "php-composer-installed-cataloger", + want: true, + }, + { + name: "partial, full word match", + targetPhrase: "composer", + candidate: "php-composer-installed-cataloger", + want: true, + }, + { + name: "partial, full, multi-word match", + targetPhrase: "php-composer", + candidate: "php-composer-installed-cataloger", + want: true, + }, + { + name: "prefix match", + targetPhrase: "php", + candidate: "php-composer-installed-cataloger", + want: true, + }, + { + name: "postfix match with -cataloger suffix", + targetPhrase: "installed", + candidate: "php-composer-installed-cataloger", + want: true, + }, + { + name: "postfix match", + targetPhrase: "installed", + candidate: "php-composer-installed", + want: true, + }, + { + name: "ignore cataloger keyword", + targetPhrase: "cataloger", + candidate: "php-composer-installed-cataloger", + want: false, + }, + { + name: "ignore partial match", + targetPhrase: "hp", + candidate: "php-composer-installed-cataloger", + want: false, + }, + { + name: "ignore empty string", + targetPhrase: "", + candidate: "php-composer-installed-cataloger", + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, hasFullWord(tt.targetPhrase, tt.candidate), "hasFullWord(%v, %v)", tt.targetPhrase, tt.candidate) + }) + } +}