Skip to content

Commit

Permalink
fix cataloger selection to be more specific
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
  • Loading branch information
wagoodman committed Feb 17, 2023
1 parent 2642a36 commit 4047734
Show file tree
Hide file tree
Showing 2 changed files with 160 additions and 3 deletions.
22 changes: 21 additions & 1 deletion syft/pkg/cataloger/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,29 @@ func contains(enabledPartial []string, catalogerName string) bool {
if partial == "" {
continue
}
if strings.Contains(catalogerName, partial) {
if hasFullWord(partial, catalogerName) {
return true
}
}
return false
}

func hasFullWord(targetPhrase, candidate string) bool {
if targetPhrase == "cataloger" || targetPhrase == "" {
return false
}
start := strings.Index(candidate, targetPhrase)
if start == -1 {
return false
}

if start > 0 && candidate[start-1] != '-' {
return false
}

end := start + len(targetPhrase)
if end < len(candidate) && candidate[end] != '-' {
return false
}
return true
}
141 changes: 139 additions & 2 deletions syft/pkg/cataloger/cataloger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,38 @@ func (d dummy) Catalog(_ source.FileResolver) ([]pkg.Package, []artifact.Relatio
}

func Test_filterCatalogers(t *testing.T) {
largeCatalogerList := []string{
"alpmdb-cataloger",
"apkdb-cataloger",
"binary-cataloger",
"conan-cataloger",
"dartlang-lock-cataloger",
"dpkgdb-cataloger",
"dotnet-deps-cataloger",
"elixir-mix-lock-cataloger",
"erlang-rebar-lock-cataloger",
"go-mod-file-cataloger",
"go-module-binary-cataloger",
"haskell-cataloger",
"graalvm-native-image-cataloger",
"java-cataloger",
"java-pom-cataloger",
"javascript-package-cataloger",
"javascript-lock-cataloger",
"php-composer-installed-cataloger",
"php-composer-lock-cataloger",
"portage-cataloger",
"python-index-cataloger",
"python-package-cataloger",
"rpm-db-cataloger",
"rpm-file-cataloger",
"ruby-gemfile-cataloger",
"ruby-gemspec-cataloger",
"rust-cargo-lock-cataloger",
"cargo-auditable-binary-cataloger",
"sbom-cataloger",
"cocoapods-cataloger",
}
tests := []struct {
name string
patterns []string
Expand Down Expand Up @@ -144,6 +176,43 @@ func Test_filterCatalogers(t *testing.T) {
"go-module-binary-cataloger",
},
},
{
name: "don't cross match ecosystems with matching prefix",
patterns: []string{
"java-cataloger",
},
catalogers: []string{
"javascript-package-cataloger",
"java-cataloger",
},
want: []string{
"java-cataloger",
},
},
{
name: "don't cross match ecosystems with short, common name",
patterns: []string{
"go",
},
catalogers: largeCatalogerList,
want: []string{
"go-mod-file-cataloger",
"go-module-binary-cataloger",
//"rust-cargo-lock-cataloger", // with naive "contains" matching
//"cargo-auditable-binary-cataloger", // with naive "contains" matching
},
},
{
name: "ignore partial matches",
patterns: []string{
"mod",
},
catalogers: largeCatalogerList,
want: []string{
"go-mod-file-cataloger",
//"go-module-binary-cataloger", // unfortunately not a full word (this should probably be renamed)
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand All @@ -162,8 +231,6 @@ func Test_filterCatalogers(t *testing.T) {
}

func Test_contains(t *testing.T) {
type args struct {
}
tests := []struct {
name string
enabledCatalogers []string
Expand Down Expand Up @@ -201,3 +268,73 @@ func Test_contains(t *testing.T) {
})
}
}

func Test_hasFullWord(t *testing.T) {

tests := []struct {
name string
targetPhrase string
candidate string
want bool
}{
{
name: "exact match",
targetPhrase: "php-composer-installed-cataloger",
candidate: "php-composer-installed-cataloger",
want: true,
},
{
name: "partial, full word match",
targetPhrase: "composer",
candidate: "php-composer-installed-cataloger",
want: true,
},
{
name: "partial, full, multi-word match",
targetPhrase: "php-composer",
candidate: "php-composer-installed-cataloger",
want: true,
},
{
name: "prefix match",
targetPhrase: "php",
candidate: "php-composer-installed-cataloger",
want: true,
},
{
name: "postfix match with -cataloger suffix",
targetPhrase: "installed",
candidate: "php-composer-installed-cataloger",
want: true,
},
{
name: "postfix match",
targetPhrase: "installed",
candidate: "php-composer-installed",
want: true,
},
{
name: "ignore cataloger keyword",
targetPhrase: "cataloger",
candidate: "php-composer-installed-cataloger",
want: false,
},
{
name: "ignore partial match",
targetPhrase: "hp",
candidate: "php-composer-installed-cataloger",
want: false,
},
{
name: "ignore empty string",
targetPhrase: "",
candidate: "php-composer-installed-cataloger",
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equalf(t, tt.want, hasFullWord(tt.targetPhrase, tt.candidate), "hasFullWord(%v, %v)", tt.targetPhrase, tt.candidate)
})
}
}

0 comments on commit 4047734

Please sign in to comment.