diff --git a/syft/pkg/cataloger/cataloger.go b/syft/pkg/cataloger/cataloger.go index b9c4867cda5..ca3d4086176 100644 --- a/syft/pkg/cataloger/cataloger.go +++ b/syft/pkg/cataloger/cataloger.go @@ -83,7 +83,7 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger { haskell.NewHackageCataloger(), java.NewArchiveCataloger(cfg.JavaConfig()), java.NewGradleLockfileCataloger(), - java.NewPomCataloger(), + java.NewPomCataloger(cfg.JavaConfig()), java.NewNativeImageCataloger(), javascript.NewLockCataloger(cfg.Javascript), nix.NewStoreCataloger(), @@ -122,7 +122,7 @@ func AllCatalogers(cfg Config) []pkg.Cataloger { haskell.NewHackageCataloger(), java.NewArchiveCataloger(cfg.JavaConfig()), java.NewGradleLockfileCataloger(), - java.NewPomCataloger(), + java.NewPomCataloger(cfg.JavaConfig()), java.NewNativeImageCataloger(), javascript.NewLockCataloger(cfg.Javascript), javascript.NewPackageCataloger(), diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index 1dee214efee..c423be7694e 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -3,15 +3,9 @@ package java import ( "crypto" "fmt" - "io" - "net/http" - "net/url" "os" "path" "strings" - "time" - - "github.com/vifraa/gopom" intFile "github.com/anchore/syft/internal/file" "github.com/anchore/syft/internal/licenses" @@ -359,98 +353,6 @@ func findPomLicenses(pomProjectObject *parsedPomProject, cfg ArchiveCatalogerCon } } -func formatMavenPomURL(groupID, artifactID, version, mavenBaseURL string) (requestURL string, err error) { - // groupID needs to go from maven.org -> maven/org - urlPath := strings.Split(groupID, ".") - artifactPom := fmt.Sprintf("%s-%s.pom", artifactID, version) - urlPath = append(urlPath, artifactID, version, artifactPom) - - // ex:"https://repo1.maven.org/maven2/groupID/artifactID/artifactPom - requestURL, err = url.JoinPath(mavenBaseURL, urlPath...) - if err != nil { - return requestURL, fmt.Errorf("could not construct maven url: %w", err) - } - return requestURL, err -} - -func recursivelyFindLicensesFromParentPom(groupID, artifactID, version string, cfg ArchiveCatalogerConfig) []string { - var licenses []string - // As there can be nested parent poms, we'll recursively check for licenses until we reach the max depth - for i := 0; i < cfg.MaxParentRecursiveDepth; i++ { - parentPom, err := getPomFromMavenRepo(groupID, artifactID, version, cfg.MavenBaseURL) - if err != nil { - // We don't want to abort here as the parent pom might not exist in Maven Central, we'll just log the error - log.Tracef("unable to get parent pom from Maven central: %v", err) - return []string{} - } - parentLicenses := parseLicensesFromPom(parentPom) - if len(parentLicenses) > 0 || parentPom == nil || parentPom.Parent == nil { - licenses = parentLicenses - break - } - - groupID = *parentPom.Parent.GroupID - artifactID = *parentPom.Parent.ArtifactID - version = *parentPom.Parent.Version - } - - return licenses -} - -func getPomFromMavenRepo(groupID, artifactID, version, mavenBaseURL string) (*gopom.Project, error) { - requestURL, err := formatMavenPomURL(groupID, artifactID, version, mavenBaseURL) - if err != nil { - return nil, err - } - log.Tracef("trying to fetch parent pom from Maven central %s", requestURL) - - mavenRequest, err := http.NewRequest(http.MethodGet, requestURL, nil) - if err != nil { - return nil, fmt.Errorf("unable to format request for Maven central: %w", err) - } - - httpClient := &http.Client{ - Timeout: time.Second * 10, - } - - resp, err := httpClient.Do(mavenRequest) - if err != nil { - return nil, fmt.Errorf("unable to get pom from Maven central: %w", err) - } - defer func() { - if err := resp.Body.Close(); err != nil { - log.Errorf("unable to close body: %+v", err) - } - }() - - bytes, err := io.ReadAll(resp.Body) - if err != nil { - return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err) - } - - pom, err := decodePomXML(strings.NewReader(string(bytes))) - if err != nil { - return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err) - } - - return &pom, nil -} - -func parseLicensesFromPom(pom *gopom.Project) []string { - var licenses []string - if pom != nil && pom.Licenses != nil { - for _, license := range *pom.Licenses { - if license.Name != nil { - licenses = append(licenses, *license.Name) - } else if license.URL != nil { - licenses = append(licenses, *license.URL) - } - } - } - - return licenses -} - // discoverPkgsFromAllMavenFiles parses Maven POM properties/xml for a given // parent package, returning all listed Java packages found for each pom // properties discovered and potentially updating the given parentPkg with new diff --git a/syft/pkg/cataloger/java/cataloger.go b/syft/pkg/cataloger/java/cataloger.go index 6764109b559..fa8560ec45e 100644 --- a/syft/pkg/cataloger/java/cataloger.go +++ b/syft/pkg/cataloger/java/cataloger.go @@ -31,9 +31,11 @@ func NewArchiveCataloger(cfg ArchiveCatalogerConfig) *generic.Cataloger { // NewPomCataloger returns a cataloger capable of parsing dependencies from a pom.xml file. // Pom files list dependencies that maybe not be locally installed yet. -func NewPomCataloger() pkg.Cataloger { +func NewPomCataloger(cfg ArchiveCatalogerConfig) pkg.Cataloger { + gap := newGenericArchiveParserAdapter(cfg) + return generic.NewCataloger("java-pom-cataloger"). - WithParserByGlobs(parserPomXML, "**/pom.xml") + WithParserByGlobs(gap.parserPomXML, "**/pom.xml") } // NewGradleLockfileCataloger returns a cataloger capable of parsing dependencies from a gradle.lockfile file. diff --git a/syft/pkg/cataloger/java/cataloger_test.go b/syft/pkg/cataloger/java/cataloger_test.go index c339e1e7a0f..da524fc5ba3 100644 --- a/syft/pkg/cataloger/java/cataloger_test.go +++ b/syft/pkg/cataloger/java/cataloger_test.go @@ -89,7 +89,15 @@ func Test_POMCataloger_Globs(t *testing.T) { pkgtest.NewCatalogTester(). FromDirectory(t, test.fixture). ExpectsResolverContentQueries(test.expected). - TestCataloger(t, NewPomCataloger()) + TestCataloger(t, + NewPomCataloger( + ArchiveCatalogerConfig{ + ArchiveSearchConfig: cataloging.ArchiveSearchConfig{ + IncludeIndexedArchives: true, + IncludeUnindexedArchives: true, + }, + }, + )) }) } } diff --git a/syft/pkg/cataloger/java/maven_repo_utils.go b/syft/pkg/cataloger/java/maven_repo_utils.go new file mode 100644 index 00000000000..603c63e8320 --- /dev/null +++ b/syft/pkg/cataloger/java/maven_repo_utils.go @@ -0,0 +1,106 @@ +package java + +import ( + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" + + "github.com/vifraa/gopom" + + "github.com/anchore/syft/internal/log" +) + +func formatMavenPomURL(groupID, artifactID, version, mavenBaseURL string) (requestURL string, err error) { + // groupID needs to go from maven.org -> maven/org + urlPath := strings.Split(groupID, ".") + artifactPom := fmt.Sprintf("%s-%s.pom", artifactID, version) + urlPath = append(urlPath, artifactID, version, artifactPom) + + // ex:"https://repo1.maven.org/maven2/groupID/artifactID/artifactPom + requestURL, err = url.JoinPath(mavenBaseURL, urlPath...) + if err != nil { + return requestURL, fmt.Errorf("could not construct maven url: %w", err) + } + return requestURL, err +} + +func recursivelyFindLicensesFromParentPom(groupID, artifactID, version string, cfg ArchiveCatalogerConfig) []string { + var licenses []string + // As there can be nested parent poms, we'll recursively check for licenses until we reach the max depth + for i := 0; i < cfg.MaxParentRecursiveDepth; i++ { + parentPom, err := getPomFromMavenRepo(groupID, artifactID, version, cfg.MavenBaseURL) + if err != nil { + // We don't want to abort here as the parent pom might not exist in Maven Central, we'll just log the error + log.Tracef("unable to get parent pom from Maven central: %v", err) + return []string{} + } + parentLicenses := parseLicensesFromPom(parentPom) + if len(parentLicenses) > 0 || parentPom == nil || parentPom.Parent == nil { + licenses = parentLicenses + break + } + + groupID = *parentPom.Parent.GroupID + artifactID = *parentPom.Parent.ArtifactID + version = *parentPom.Parent.Version + } + + return licenses +} + +func getPomFromMavenRepo(groupID, artifactID, version, mavenBaseURL string) (*gopom.Project, error) { + requestURL, err := formatMavenPomURL(groupID, artifactID, version, mavenBaseURL) + if err != nil { + return nil, err + } + log.Tracef("trying to fetch parent pom from Maven central %s", requestURL) + + mavenRequest, err := http.NewRequest(http.MethodGet, requestURL, nil) + if err != nil { + return nil, fmt.Errorf("unable to format request for Maven central: %w", err) + } + + httpClient := &http.Client{ + Timeout: time.Second * 10, + } + + resp, err := httpClient.Do(mavenRequest) + if err != nil { + return nil, fmt.Errorf("unable to get pom from Maven central: %w", err) + } + defer func() { + if err := resp.Body.Close(); err != nil { + log.Errorf("unable to close body: %+v", err) + } + }() + + bytes, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err) + } + + pom, err := decodePomXML(strings.NewReader(string(bytes))) + if err != nil { + return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err) + } + + return &pom, nil +} + +func parseLicensesFromPom(pom *gopom.Project) []string { + var licenses []string + if pom != nil && pom.Licenses != nil { + for _, license := range *pom.Licenses { + if license.Name != nil { + licenses = append(licenses, *license.Name) + } else if license.URL != nil { + licenses = append(licenses, *license.URL) + } + } + } + + return licenses +} diff --git a/syft/pkg/cataloger/java/parse_pom_xml.go b/syft/pkg/cataloger/java/parse_pom_xml.go index 75376521fc9..7d6ce92b9e6 100644 --- a/syft/pkg/cataloger/java/parse_pom_xml.go +++ b/syft/pkg/cataloger/java/parse_pom_xml.go @@ -24,7 +24,7 @@ const pomXMLGlob = "*pom.xml" var propertyMatcher = regexp.MustCompile("[$][{][^}]+[}]") -func parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { +func (gap genericArchiveParserAdapter) parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { pom, err := decodePomXML(reader) if err != nil { return nil, nil, err @@ -36,6 +36,7 @@ func parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationR p := newPackageFromPom( pom, dep, + gap.cfg, reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), ) if p.Name == "" { @@ -97,7 +98,7 @@ func newPomProject(path string, p gopom.Project, location file.Location) *parsed } } -func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, locations ...file.Location) pkg.Package { +func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, cfg ArchiveCatalogerConfig, locations ...file.Location) pkg.Package { m := pkg.JavaArchive{ PomProperties: &pkg.JavaPomProperties{ GroupID: resolveProperty(pom, dep.GroupID, "groupId"), @@ -109,10 +110,26 @@ func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, locations ...fil name := safeString(dep.ArtifactID) version := resolveProperty(pom, dep.Version, "version") + licenses := make([]pkg.License, 0) + if version != "" && cfg.UseNetwork { + parentLicenses := recursivelyFindLicensesFromParentPom( + m.PomProperties.GroupID, + m.PomProperties.ArtifactID, + version, + cfg) + + if len(parentLicenses) > 0 { + for _, licenseName := range parentLicenses { + licenses = append(licenses, pkg.NewLicenseFromFields(licenseName, "", nil)) + } + } + } + p := pkg.Package{ Name: name, Version: version, Locations: file.NewLocationSet(locations...), + Licenses: pkg.NewLicenseSet(licenses...), PURL: packageURL(name, version, m), Language: pkg.Java, Type: pkg.JavaPkg, // TODO: should we differentiate between packages from jar/war/zip versus packages from a pom.xml that were not installed yet? diff --git a/syft/pkg/cataloger/java/parse_pom_xml_test.go b/syft/pkg/cataloger/java/parse_pom_xml_test.go index b794d28e72e..c845233b125 100644 --- a/syft/pkg/cataloger/java/parse_pom_xml_test.go +++ b/syft/pkg/cataloger/java/parse_pom_xml_test.go @@ -11,6 +11,7 @@ import ( "github.com/stretchr/testify/require" "github.com/vifraa/gopom" + "github.com/anchore/syft/syft/cataloging" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/license" "github.com/anchore/syft/syft/pkg" @@ -61,7 +62,15 @@ func Test_parserPomXML(t *testing.T) { for i := range test.expected { test.expected[i].Locations.Add(file.NewLocation(test.input)) } - pkgtest.TestFileParser(t, test.input, parserPomXML, test.expected, nil) + + gap := newGenericArchiveParserAdapter(ArchiveCatalogerConfig{ + ArchiveSearchConfig: cataloging.ArchiveSearchConfig{ + IncludeIndexedArchives: true, + IncludeUnindexedArchives: true, + }, + }) + + pkgtest.TestFileParser(t, test.input, gap.parserPomXML, test.expected, nil) }) } } @@ -276,7 +285,14 @@ func Test_parseCommonsTextPomXMLProject(t *testing.T) { for i := range test.expected { test.expected[i].Locations.Add(file.NewLocation(test.input)) } - pkgtest.TestFileParser(t, test.input, parserPomXML, test.expected, nil) + + gap := newGenericArchiveParserAdapter(ArchiveCatalogerConfig{ + ArchiveSearchConfig: cataloging.ArchiveSearchConfig{ + IncludeIndexedArchives: true, + IncludeUnindexedArchives: true, + }, + }) + pkgtest.TestFileParser(t, test.input, gap.parserPomXML, test.expected, nil) }) } }