From dde5d349b1eef740c285255e6a9e3a8f5c9938e1 Mon Sep 17 00:00:00 2001 From: Keith Zantow Date: Thu, 11 Apr 2024 11:58:51 -0400 Subject: [PATCH] fix: more robust go main version extraction (#2767) Signed-off-by: Keith Zantow Signed-off-by: Will Murphy Co-authored-by: Will Murphy --- syft/pkg/cataloger/golang/parse_go_binary.go | 32 ++++++++---- .../cataloger/golang/parse_go_binary_test.go | 52 +++++++++++++++++++ .../cataloger/php/parse_pecl_serialized.go | 3 +- 3 files changed, 75 insertions(+), 12 deletions(-) diff --git a/syft/pkg/cataloger/golang/parse_go_binary.go b/syft/pkg/cataloger/golang/parse_go_binary.go index eaef4909503..fd8c0543906 100644 --- a/syft/pkg/cataloger/golang/parse_go_binary.go +++ b/syft/pkg/cataloger/golang/parse_go_binary.go @@ -151,7 +151,10 @@ func (c *goBinaryCataloger) makeGoMainPackage(resolver file.Resolver, mod *exten return main } -var semverPattern = regexp.MustCompile(`\x00(?Pv?(\d+\.\d+\.\d+[-\w]*[+\w]*))\x00`) +// this is checking for (.L)? because at least one binary seems to have \xA0L preceding the version string, but for some reason +// this is unable to be matched by the regex here as \x00\xA0L; +// the only thing that seems to work is to just look for version strings following both \x00 and \x00.L for now +var semverPattern = regexp.MustCompile(`\x00(.L)?(?Pv?(\d+\.\d+\.\d+[-\w]*[+\w]*))\x00`) func (c *goBinaryCataloger) findMainModuleVersion(metadata *pkg.GolangBinaryBuildinfoEntry, gbs pkg.KeyValues, reader io.ReadSeekCloser) string { vcsVersion, hasVersion := gbs.Get("vcs.revision") @@ -179,16 +182,8 @@ func (c *goBinaryCataloger) findMainModuleVersion(metadata *pkg.GolangBinaryBuil if err != nil { log.WithFields("error", err).Trace("unable to seek to start of go binary reader") } else { - contents, err := io.ReadAll(reader) - if err != nil { - log.WithFields("error", err).Trace("unable to read from go binary reader") - } else { - matchMetadata := internal.MatchNamedCaptureGroups(semverPattern, string(contents)) - - version, ok := matchMetadata["version"] - if ok { - return version - } + if v := extractVersionFromContents(reader); v != "" { + return v } } } @@ -210,6 +205,21 @@ func (c *goBinaryCataloger) findMainModuleVersion(metadata *pkg.GolangBinaryBuil return "" } +func extractVersionFromContents(reader io.Reader) string { + contents, err := io.ReadAll(reader) + if err != nil { + log.WithFields("error", err).Trace("unable to read from go binary reader") + return "" + } + matchMetadata := internal.MatchNamedCaptureGroups(semverPattern, string(contents)) + + version, ok := matchMetadata["version"] + if ok { + return version + } + return "" +} + func extractVersionFromLDFlags(ldflags string) (majorVersion string, fullVersion string) { if ldflags == "" { return "", "" diff --git a/syft/pkg/cataloger/golang/parse_go_binary_test.go b/syft/pkg/cataloger/golang/parse_go_binary_test.go index 583b762a0f4..81faeafb53b 100644 --- a/syft/pkg/cataloger/golang/parse_go_binary_test.go +++ b/syft/pkg/cataloger/golang/parse_go_binary_test.go @@ -2,6 +2,8 @@ package golang import ( "bufio" + "bytes" + "errors" "io" "os" "os/exec" @@ -1090,3 +1092,53 @@ func Test_extractVersionFromLDFlags(t *testing.T) { }) } } + +func Test_extractVersionFromContents(t *testing.T) { + tests := []struct { + name string + contents io.Reader + want string + }{ + { + name: "empty string on error", + contents: &alwaysErrorReader{}, + want: "", + }, + { + name: "empty string on empty reader", + contents: bytes.NewReader([]byte{}), + want: "", + }, + { + name: "null-byte delimited semver", + contents: strings.NewReader("\x001.2.3\x00"), + want: "1.2.3", + }, + { + name: "null-byte delimited semver with v prefix", + contents: strings.NewReader("\x00v1.2.3\x00"), + want: "v1.2.3", + }, + { + // 01a0bfc8: 0e74 5a3b 0000 a04c 7631 2e39 2e35 0000 .tZ;...Lv1.9.5.. from nginx-ingress-controller + // at /nginx-ingress-controller in registry.k8s.io/ingress-nginx/controller:v1.9.5 + // digest: sha256:b3aba22b1da80e7acfc52b115cae1d4c687172cbf2b742d5b502419c25ff340e + // TODO: eventually use something for managing snippets, similar to what's used with binary classifier tests + name: "null byte, then random byte, then L then semver", + contents: strings.NewReader("\x0e\x74\x5a\x3b\x00\x00\xa0\x4cv1.9.5\x00\x00"), + want: "v1.9.5", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := extractVersionFromContents(tt.contents) + assert.Equal(t, tt.want, got) + }) + } +} + +type alwaysErrorReader struct{} + +func (alwaysErrorReader) Read(_ []byte) (int, error) { + return 0, errors.New("read from always error reader") +} diff --git a/syft/pkg/cataloger/php/parse_pecl_serialized.go b/syft/pkg/cataloger/php/parse_pecl_serialized.go index 9779c7e047e..7f48f209607 100644 --- a/syft/pkg/cataloger/php/parse_pecl_serialized.go +++ b/syft/pkg/cataloger/php/parse_pecl_serialized.go @@ -5,12 +5,13 @@ import ( "fmt" "io" + "github.com/elliotchance/phpserialize" + "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg/cataloger/generic" - "github.com/elliotchance/phpserialize" ) // parsePeclSerialized is a parser function for PECL metadata contents, returning "Default" php packages discovered.