Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the option to retrieve remote licenses for projects defined in a … #2409

Merged
merged 1 commit into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions syft/pkg/cataloger/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger {
haskell.NewHackageCataloger(),
java.NewArchiveCataloger(cfg.JavaConfig()),
java.NewGradleLockfileCataloger(),
java.NewPomCataloger(),
java.NewPomCataloger(cfg.JavaConfig()),
java.NewNativeImageCataloger(),
javascript.NewLockCataloger(cfg.Javascript),
nix.NewStoreCataloger(),
Expand Down Expand Up @@ -122,7 +122,7 @@ func AllCatalogers(cfg Config) []pkg.Cataloger {
haskell.NewHackageCataloger(),
java.NewArchiveCataloger(cfg.JavaConfig()),
java.NewGradleLockfileCataloger(),
java.NewPomCataloger(),
java.NewPomCataloger(cfg.JavaConfig()),
java.NewNativeImageCataloger(),
javascript.NewLockCataloger(cfg.Javascript),
javascript.NewPackageCataloger(),
Expand Down
98 changes: 0 additions & 98 deletions syft/pkg/cataloger/java/archive_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,9 @@ package java
import (
"crypto"
"fmt"
"io"
"net/http"
"net/url"
"os"
"path"
"strings"
"time"

"github.com/vifraa/gopom"

intFile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/licenses"
Expand Down Expand Up @@ -359,98 +353,6 @@ func findPomLicenses(pomProjectObject *parsedPomProject, cfg ArchiveCatalogerCon
}
}

func formatMavenPomURL(groupID, artifactID, version, mavenBaseURL string) (requestURL string, err error) {
// groupID needs to go from maven.org -> maven/org
urlPath := strings.Split(groupID, ".")
artifactPom := fmt.Sprintf("%s-%s.pom", artifactID, version)
urlPath = append(urlPath, artifactID, version, artifactPom)

// ex:"https://repo1.maven.org/maven2/groupID/artifactID/artifactPom
requestURL, err = url.JoinPath(mavenBaseURL, urlPath...)
if err != nil {
return requestURL, fmt.Errorf("could not construct maven url: %w", err)
}
return requestURL, err
}

func recursivelyFindLicensesFromParentPom(groupID, artifactID, version string, cfg ArchiveCatalogerConfig) []string {
var licenses []string
// As there can be nested parent poms, we'll recursively check for licenses until we reach the max depth
for i := 0; i < cfg.MaxParentRecursiveDepth; i++ {
parentPom, err := getPomFromMavenRepo(groupID, artifactID, version, cfg.MavenBaseURL)
if err != nil {
// We don't want to abort here as the parent pom might not exist in Maven Central, we'll just log the error
log.Tracef("unable to get parent pom from Maven central: %v", err)
return []string{}
}
parentLicenses := parseLicensesFromPom(parentPom)
if len(parentLicenses) > 0 || parentPom == nil || parentPom.Parent == nil {
licenses = parentLicenses
break
}

groupID = *parentPom.Parent.GroupID
artifactID = *parentPom.Parent.ArtifactID
version = *parentPom.Parent.Version
}

return licenses
}

func getPomFromMavenRepo(groupID, artifactID, version, mavenBaseURL string) (*gopom.Project, error) {
requestURL, err := formatMavenPomURL(groupID, artifactID, version, mavenBaseURL)
if err != nil {
return nil, err
}
log.Tracef("trying to fetch parent pom from Maven central %s", requestURL)

mavenRequest, err := http.NewRequest(http.MethodGet, requestURL, nil)
if err != nil {
return nil, fmt.Errorf("unable to format request for Maven central: %w", err)
}

httpClient := &http.Client{
Timeout: time.Second * 10,
}

resp, err := httpClient.Do(mavenRequest)
if err != nil {
return nil, fmt.Errorf("unable to get pom from Maven central: %w", err)
}
defer func() {
if err := resp.Body.Close(); err != nil {
log.Errorf("unable to close body: %+v", err)
}
}()

bytes, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
}

pom, err := decodePomXML(strings.NewReader(string(bytes)))
if err != nil {
return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
}

return &pom, nil
}

func parseLicensesFromPom(pom *gopom.Project) []string {
var licenses []string
if pom != nil && pom.Licenses != nil {
for _, license := range *pom.Licenses {
if license.Name != nil {
licenses = append(licenses, *license.Name)
} else if license.URL != nil {
licenses = append(licenses, *license.URL)
}
}
}

return licenses
}

// discoverPkgsFromAllMavenFiles parses Maven POM properties/xml for a given
// parent package, returning all listed Java packages found for each pom
// properties discovered and potentially updating the given parentPkg with new
Expand Down
6 changes: 4 additions & 2 deletions syft/pkg/cataloger/java/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,11 @@ func NewArchiveCataloger(cfg ArchiveCatalogerConfig) *generic.Cataloger {

// NewPomCataloger returns a cataloger capable of parsing dependencies from a pom.xml file.
// Pom files list dependencies that maybe not be locally installed yet.
func NewPomCataloger() pkg.Cataloger {
func NewPomCataloger(cfg ArchiveCatalogerConfig) pkg.Cataloger {
gap := newGenericArchiveParserAdapter(cfg)

return generic.NewCataloger("java-pom-cataloger").
WithParserByGlobs(parserPomXML, "**/pom.xml")
WithParserByGlobs(gap.parserPomXML, "**/pom.xml")
}

// NewGradleLockfileCataloger returns a cataloger capable of parsing dependencies from a gradle.lockfile file.
Expand Down
10 changes: 9 additions & 1 deletion syft/pkg/cataloger/java/cataloger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,15 @@ func Test_POMCataloger_Globs(t *testing.T) {
pkgtest.NewCatalogTester().
FromDirectory(t, test.fixture).
ExpectsResolverContentQueries(test.expected).
TestCataloger(t, NewPomCataloger())
TestCataloger(t,
NewPomCataloger(
ArchiveCatalogerConfig{
ArchiveSearchConfig: cataloging.ArchiveSearchConfig{
IncludeIndexedArchives: true,
IncludeUnindexedArchives: true,
},
},
))
})
}
}
106 changes: 106 additions & 0 deletions syft/pkg/cataloger/java/maven_repo_utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package java

import (
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"

"github.com/vifraa/gopom"

"github.com/anchore/syft/internal/log"
)

func formatMavenPomURL(groupID, artifactID, version, mavenBaseURL string) (requestURL string, err error) {
// groupID needs to go from maven.org -> maven/org
urlPath := strings.Split(groupID, ".")
artifactPom := fmt.Sprintf("%s-%s.pom", artifactID, version)
urlPath = append(urlPath, artifactID, version, artifactPom)

// ex:"https://repo1.maven.org/maven2/groupID/artifactID/artifactPom
requestURL, err = url.JoinPath(mavenBaseURL, urlPath...)
if err != nil {
return requestURL, fmt.Errorf("could not construct maven url: %w", err)
}
return requestURL, err
}

func recursivelyFindLicensesFromParentPom(groupID, artifactID, version string, cfg ArchiveCatalogerConfig) []string {
var licenses []string
// As there can be nested parent poms, we'll recursively check for licenses until we reach the max depth
for i := 0; i < cfg.MaxParentRecursiveDepth; i++ {
parentPom, err := getPomFromMavenRepo(groupID, artifactID, version, cfg.MavenBaseURL)
if err != nil {
// We don't want to abort here as the parent pom might not exist in Maven Central, we'll just log the error
log.Tracef("unable to get parent pom from Maven central: %v", err)
return []string{}
}
parentLicenses := parseLicensesFromPom(parentPom)
if len(parentLicenses) > 0 || parentPom == nil || parentPom.Parent == nil {
licenses = parentLicenses
break
}

groupID = *parentPom.Parent.GroupID
artifactID = *parentPom.Parent.ArtifactID
version = *parentPom.Parent.Version
}

return licenses
}

func getPomFromMavenRepo(groupID, artifactID, version, mavenBaseURL string) (*gopom.Project, error) {
requestURL, err := formatMavenPomURL(groupID, artifactID, version, mavenBaseURL)
if err != nil {
return nil, err
}
log.Tracef("trying to fetch parent pom from Maven central %s", requestURL)

mavenRequest, err := http.NewRequest(http.MethodGet, requestURL, nil)
if err != nil {
return nil, fmt.Errorf("unable to format request for Maven central: %w", err)
}

httpClient := &http.Client{
Timeout: time.Second * 10,
}

resp, err := httpClient.Do(mavenRequest)
if err != nil {
return nil, fmt.Errorf("unable to get pom from Maven central: %w", err)
}
defer func() {
if err := resp.Body.Close(); err != nil {
log.Errorf("unable to close body: %+v", err)
}
}()

bytes, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
}

pom, err := decodePomXML(strings.NewReader(string(bytes)))
if err != nil {
return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
}

return &pom, nil
}

func parseLicensesFromPom(pom *gopom.Project) []string {
var licenses []string
if pom != nil && pom.Licenses != nil {
for _, license := range *pom.Licenses {
if license.Name != nil {
licenses = append(licenses, *license.Name)
} else if license.URL != nil {
licenses = append(licenses, *license.URL)
}
}
}

return licenses
}
21 changes: 19 additions & 2 deletions syft/pkg/cataloger/java/parse_pom_xml.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ const pomXMLGlob = "*pom.xml"

var propertyMatcher = regexp.MustCompile("[$][{][^}]+[}]")

func parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
func (gap genericArchiveParserAdapter) parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
pom, err := decodePomXML(reader)
if err != nil {
return nil, nil, err
Expand All @@ -36,6 +36,7 @@ func parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationR
p := newPackageFromPom(
pom,
dep,
gap.cfg,
reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
)
if p.Name == "" {
Expand Down Expand Up @@ -97,7 +98,7 @@ func newPomProject(path string, p gopom.Project, location file.Location) *parsed
}
}

func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, locations ...file.Location) pkg.Package {
func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, cfg ArchiveCatalogerConfig, locations ...file.Location) pkg.Package {
m := pkg.JavaArchive{
PomProperties: &pkg.JavaPomProperties{
GroupID: resolveProperty(pom, dep.GroupID, "groupId"),
Expand All @@ -109,10 +110,26 @@ func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, locations ...fil
name := safeString(dep.ArtifactID)
version := resolveProperty(pom, dep.Version, "version")

licenses := make([]pkg.License, 0)
if version != "" && cfg.UseNetwork {
parentLicenses := recursivelyFindLicensesFromParentPom(
m.PomProperties.GroupID,
m.PomProperties.ArtifactID,
version,
cfg)

if len(parentLicenses) > 0 {
for _, licenseName := range parentLicenses {
licenses = append(licenses, pkg.NewLicenseFromFields(licenseName, "", nil))
}
}
}

p := pkg.Package{
Name: name,
Version: version,
Locations: file.NewLocationSet(locations...),
Licenses: pkg.NewLicenseSet(licenses...),
PURL: packageURL(name, version, m),
Language: pkg.Java,
Type: pkg.JavaPkg, // TODO: should we differentiate between packages from jar/war/zip versus packages from a pom.xml that were not installed yet?
Expand Down
20 changes: 18 additions & 2 deletions syft/pkg/cataloger/java/parse_pom_xml_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/vifraa/gopom"

"github.com/anchore/syft/syft/cataloging"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/license"
"github.com/anchore/syft/syft/pkg"
Expand Down Expand Up @@ -61,7 +62,15 @@ func Test_parserPomXML(t *testing.T) {
for i := range test.expected {
test.expected[i].Locations.Add(file.NewLocation(test.input))
}
pkgtest.TestFileParser(t, test.input, parserPomXML, test.expected, nil)

gap := newGenericArchiveParserAdapter(ArchiveCatalogerConfig{
ArchiveSearchConfig: cataloging.ArchiveSearchConfig{
IncludeIndexedArchives: true,
IncludeUnindexedArchives: true,
},
})

pkgtest.TestFileParser(t, test.input, gap.parserPomXML, test.expected, nil)
})
}
}
Expand Down Expand Up @@ -276,7 +285,14 @@ func Test_parseCommonsTextPomXMLProject(t *testing.T) {
for i := range test.expected {
test.expected[i].Locations.Add(file.NewLocation(test.input))
}
pkgtest.TestFileParser(t, test.input, parserPomXML, test.expected, nil)

gap := newGenericArchiveParserAdapter(ArchiveCatalogerConfig{
ArchiveSearchConfig: cataloging.ArchiveSearchConfig{
IncludeIndexedArchives: true,
IncludeUnindexedArchives: true,
},
})
pkgtest.TestFileParser(t, test.input, gap.parserPomXML, test.expected, nil)
})
}
}
Expand Down
Loading