From 9627cb88c1a2753bcd283da86b91a001acafc648 Mon Sep 17 00:00:00 2001 From: Keith Zantow Date: Sun, 21 Jul 2024 15:28:36 -0400 Subject: [PATCH 1/5] fix: spdx output performance with many relationships Signed-off-by: Keith Zantow --- .../package_binary_elf_relationships_test.go | 4 +- .../relationships/indexed_relationships.go | 123 ++++++++++++++++++ .../indexed_relationships_test.go | 74 +++++++++++ .../common/spdxhelpers/to_format_model.go | 20 +-- syft/sbom/sbom.go | 2 + 5 files changed, 213 insertions(+), 10 deletions(-) create mode 100644 internal/relationships/indexed_relationships.go create mode 100644 internal/relationships/indexed_relationships_test.go diff --git a/cmd/syft/internal/test/integration/package_binary_elf_relationships_test.go b/cmd/syft/internal/test/integration/package_binary_elf_relationships_test.go index 354c7fba8ef..a088a6b4db9 100644 --- a/cmd/syft/internal/test/integration/package_binary_elf_relationships_test.go +++ b/cmd/syft/internal/test/integration/package_binary_elf_relationships_test.go @@ -5,6 +5,7 @@ import ( "github.com/stretchr/testify/require" + "github.com/anchore/syft/internal/relationships" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/source" ) @@ -44,12 +45,13 @@ func TestBinaryElfRelationships(t *testing.T) { } } + relationshipIndex := relationships.NewIndex(sbom.Relationships) for name, expectedDepNames := range expectedGraph { pkgId := nameToId[name] p := sbom.Artifacts.Packages.Package(pkgId) require.NotNil(t, p, "expected package %q to be present in the SBOM", name) - rels := sbom.RelationshipsForPackage(*p, artifact.DependencyOfRelationship) + rels := relationshipIndex.ToAndFrom(*p, artifact.DependencyOfRelationship) require.NotEmpty(t, rels, "expected package %q to have relationships", name) toIds := map[artifact.ID]struct{}{} diff --git a/internal/relationships/indexed_relationships.go b/internal/relationships/indexed_relationships.go new file mode 100644 index 00000000000..cffb90cad8b --- /dev/null +++ b/internal/relationships/indexed_relationships.go @@ -0,0 +1,123 @@ +package relationships + +import ( + "slices" + "strings" + + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" +) + +type Index struct { + byFromID map[artifact.ID][]artifact.Relationship + byToID map[artifact.ID][]artifact.Relationship +} + +// NewIndex returns a new relationship Index +func NewIndex(relationships []artifact.Relationship) *Index { + out := &Index{ + byFromID: map[artifact.ID][]artifact.Relationship{}, + byToID: map[artifact.ID][]artifact.Relationship{}, + } + + fromIDs := map[artifact.ID][]sortableRelationship{} + toIDs := map[artifact.ID][]sortableRelationship{} + + // store appropriate indexes for stable ordering to minimize ID() calls + for _, r := range relationships { + fromID := r.From.ID() + toID := r.To.ID() + + fromIDs[fromID] = append(fromIDs[fromID], sortableRelationship{string(toID), r}) + out.byToID[toID] = append(out.byToID[toID], r) + } + + for k, v := range fromIDs { + out.byFromID[k] = collect(v) + } + + for k, v := range toIDs { + out.byToID[k] = collect(v) + } + + return out +} + +// From returns all relationships from the given identifiable type +func (i *Index) From(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship { + if identifiable == nil { + return nil + } + var out []artifact.Relationship + for _, r := range i.byFromID[identifiable.ID()] { + if len(types) == 0 || slices.Contains(types, r.Type) { + out = append(out, r) + } + } + return out +} + +// To returns all relationships to the given identifiable type +func (i *Index) To(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship { + if identifiable == nil { + return nil + } + var out []artifact.Relationship + for _, r := range i.byToID[identifiable.ID()] { + if len(types) == 0 || slices.Contains(types, r.Type) { + out = append(out, r) + } + } + return out +} + +// ToAndFrom returns all relationships to or from the given identifiable type +func (i *Index) ToAndFrom(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship { + return append(i.To(identifiable, types...), i.From(identifiable, types...)...) +} + +// Coordinates returns all coordinates for the provided identifiable for provided relationship types +// If no types are provided, all relationship types are considered. +func (i *Index) Coordinates(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []file.Coordinates { + var coordinates []file.Coordinates + for _, relationship := range i.ToAndFrom(identifiable, types...) { + cords := extractCoordinates(relationship) + coordinates = append(coordinates, cords...) + } + return coordinates +} + +func extractCoordinates(relationship artifact.Relationship) (results []file.Coordinates) { + if coordinates, exists := relationship.From.(file.Coordinates); exists { + results = append(results, coordinates) + } + + if coordinates, exists := relationship.To.(file.Coordinates); exists { + results = append(results, coordinates) + } + + return results +} + +type sortableRelationship struct { + key string + rel artifact.Relationship +} + +func collect(relationships []sortableRelationship) []artifact.Relationship { + slices.SortFunc(relationships, sortFunc) + + var out []artifact.Relationship + for _, rel := range relationships { + out = append(out, rel.rel) + } + return out +} + +func sortFunc(a, b sortableRelationship) int { + cmp := strings.Compare(string(a.rel.Type), string(b.rel.Type)) + if cmp != 0 { + return cmp + } + return strings.Compare(a.key, b.key) +} diff --git a/internal/relationships/indexed_relationships_test.go b/internal/relationships/indexed_relationships_test.go new file mode 100644 index 00000000000..a82b381c59c --- /dev/null +++ b/internal/relationships/indexed_relationships_test.go @@ -0,0 +1,74 @@ +package relationships + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" +) + +func Test_Index(t *testing.T) { + p1 := pkg.Package{ + Name: "pkg-1", + } + p2 := pkg.Package{ + Name: "pkg-2", + } + p3 := pkg.Package{ + Name: "pkg-3", + } + c1 := file.Coordinates{ + RealPath: "/coords/1", + } + c2 := file.Coordinates{ + RealPath: "/coords/2", + } + + for _, p := range []*pkg.Package{&p1, &p2, &p3} { + p.SetID() + } + + r1 := artifact.Relationship{ + From: p1, + To: p2, + Type: artifact.DependencyOfRelationship, + } + r2 := artifact.Relationship{ + From: p1, + To: p3, + Type: artifact.DependencyOfRelationship, + } + r3 := artifact.Relationship{ + From: p1, + To: c1, + Type: artifact.ContainsRelationship, + } + r4 := artifact.Relationship{ + From: p2, + To: c2, + Type: artifact.ContainsRelationship, + } + r5 := artifact.Relationship{ + From: p3, + To: c2, + Type: artifact.ContainsRelationship, + } + + idx := NewIndex([]artifact.Relationship{r1, r2, r3, r4, r5}) + + require.EqualValues(t, idx.ToAndFrom(p2), rels(r1, r4)) + require.EqualValues(t, idx.ToAndFrom(p2, artifact.ContainsRelationship), rels(r4)) + + require.EqualValues(t, idx.To(p2), rels(r1)) + require.EqualValues(t, idx.To(p2, artifact.ContainsRelationship), rels()) + + require.EqualValues(t, idx.From(p2), rels(r4)) + require.EqualValues(t, idx.From(p2, artifact.ContainsRelationship), rels(r4)) +} + +func rels(values ...artifact.Relationship) []artifact.Relationship { + return values +} diff --git a/syft/format/common/spdxhelpers/to_format_model.go b/syft/format/common/spdxhelpers/to_format_model.go index 767ffdcbe62..83ef123f438 100644 --- a/syft/format/common/spdxhelpers/to_format_model.go +++ b/syft/format/common/spdxhelpers/to_format_model.go @@ -16,6 +16,7 @@ import ( "github.com/anchore/packageurl-go" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/mimetype" + "github.com/anchore/syft/internal/relationships" "github.com/anchore/syft/internal/spdxlicense" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" @@ -45,9 +46,10 @@ const ( func ToFormatModel(s sbom.SBOM) *spdx.Document { name, namespace := helpers.DocumentNameAndNamespace(s.Source, s.Descriptor) - packages := toPackages(s.Artifacts.Packages, s) + rels := relationships.NewIndex(s.Relationships) + packages := toPackages(rels, s.Artifacts.Packages, s) - relationships := toRelationships(s.RelationshipsSorted()) + allRelationships := toRelationships(s.RelationshipsSorted()) // for valid SPDX we need a document describes relationship describesID := spdx.ElementID("DOCUMENT") @@ -57,7 +59,7 @@ func ToFormatModel(s sbom.SBOM) *spdx.Document { describesID = rootPackage.PackageSPDXIdentifier // add all relationships from the document root to all other packages - relationships = append(relationships, toRootRelationships(rootPackage, packages)...) + allRelationships = append(allRelationships, toRootRelationships(rootPackage, packages)...) // append the root package packages = append(packages, rootPackage) @@ -75,7 +77,7 @@ func ToFormatModel(s sbom.SBOM) *spdx.Document { } // add the root document relationship - relationships = append(relationships, documentDescribesRelationship) + allRelationships = append(allRelationships, documentDescribesRelationship) return &spdx.Document{ // 6.1: SPDX Version; should be in the format "SPDX-x.x" @@ -150,7 +152,7 @@ func ToFormatModel(s sbom.SBOM) *spdx.Document { }, Packages: packages, Files: toFiles(s), - Relationships: relationships, + Relationships: allRelationships, OtherLicenses: toOtherLicenses(s.Artifacts.Packages), } } @@ -302,7 +304,7 @@ func toSPDXID(identifiable artifact.Identifiable) spdx.ElementID { // packages populates all Package Information from the package Collection (see https://spdx.github.io/spdx-spec/3-package-information/) // //nolint:funlen -func toPackages(catalog *pkg.Collection, sbom sbom.SBOM) (results []*spdx.Package) { +func toPackages(rels *relationships.Index, catalog *pkg.Collection, sbom sbom.SBOM) (results []*spdx.Package) { for _, p := range catalog.Sorted() { // name should be guaranteed to be unique, but semantically useful and stable id := toSPDXID(p) @@ -318,7 +320,7 @@ func toPackages(catalog *pkg.Collection, sbom sbom.SBOM) (results []*spdx.Packag // 2. syft has generated a sha1 digest for the package's contents packageChecksums, filesAnalyzed := toPackageChecksums(p) - packageVerificationCode := newPackageVerificationCode(p, sbom) + packageVerificationCode := newPackageVerificationCode(rels, p, sbom) if packageVerificationCode != nil { filesAnalyzed = true } @@ -744,12 +746,12 @@ func toOtherLicenses(catalog *pkg.Collection) []*spdx.OtherLicense { // f file is an "excludes" file, skip it /* exclude SPDX analysis file(s) */ // see: https://spdx.github.io/spdx-spec/v2.3/package-information/#79-package-verification-code-field // the above link contains the SPDX algorithm for a package verification code -func newPackageVerificationCode(p pkg.Package, sbom sbom.SBOM) *spdx.PackageVerificationCode { +func newPackageVerificationCode(rels *relationships.Index, p pkg.Package, sbom sbom.SBOM) *spdx.PackageVerificationCode { // key off of the contains relationship; // spdx validator will fail if a package claims to contain a file but no sha1 provided // if a sha1 for a file is provided then the validator will fail if the package does not have // a package verification code - coordinates := sbom.CoordinatesForPackage(p, artifact.ContainsRelationship) + coordinates := rels.Coordinates(p, artifact.ContainsRelationship) var digests []file.Digest for _, c := range coordinates { digest := sbom.Artifacts.FileDigests[c] diff --git a/syft/sbom/sbom.go b/syft/sbom/sbom.go index b3f3cd74ff9..7c29457343e 100644 --- a/syft/sbom/sbom.go +++ b/syft/sbom/sbom.go @@ -70,6 +70,7 @@ func (s SBOM) AllCoordinates() []file.Coordinates { // RelationshipsForPackage returns all relationships for the provided types. // If no types are provided, all relationships for the package are returned. +// Deprecated -- use relationships.Index func (s SBOM) RelationshipsForPackage(p pkg.Package, rt ...artifact.RelationshipType) []artifact.Relationship { if len(rt) == 0 { rt = artifact.AllRelationshipTypes() @@ -103,6 +104,7 @@ func (s SBOM) RelationshipsForPackage(p pkg.Package, rt ...artifact.Relationship // CoordinatesForPackage returns all coordinates for the provided package for provided relationship types // If no types are provided, all relationship types are considered. +// Deprecated -- use relationships.Index func (s SBOM) CoordinatesForPackage(p pkg.Package, rt ...artifact.RelationshipType) []file.Coordinates { var coordinates []file.Coordinates for _, relationship := range s.RelationshipsForPackage(p, rt...) { From 53c7931518be29922f427e224e5cce5534c00ad4 Mon Sep 17 00:00:00 2001 From: Keith Zantow Date: Sun, 21 Jul 2024 15:44:08 -0400 Subject: [PATCH 2/5] chore: update tests Signed-off-by: Keith Zantow --- syft/format/common/spdxhelpers/to_format_model_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/syft/format/common/spdxhelpers/to_format_model_test.go b/syft/format/common/spdxhelpers/to_format_model_test.go index c2f181f0d89..d5d19d416b1 100644 --- a/syft/format/common/spdxhelpers/to_format_model_test.go +++ b/syft/format/common/spdxhelpers/to_format_model_test.go @@ -12,6 +12,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/anchore/syft/internal/relationships" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/format/internal/spdxutil/helpers" @@ -665,7 +666,7 @@ func Test_H1Digest(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { catalog := pkg.NewCollection(test.pkg) - pkgs := toPackages(catalog, s) + pkgs := toPackages(relationships.NewIndex(nil), catalog, s) require.Len(t, pkgs, 1) for _, p := range pkgs { if test.expectedDigest == "" { From a731f47e5b281b468de7c5f89dab1d9fc8b92f91 Mon Sep 17 00:00:00 2001 From: Keith Zantow Date: Mon, 22 Jul 2024 18:03:04 -0400 Subject: [PATCH 3/5] chore: refactor relationship.Index implementation Signed-off-by: Keith Zantow --- .../package_binary_elf_relationships_test.go | 6 +- .../binary/binary_dependencies.go | 35 +- internal/relationship/index.go | 194 +++++++--- internal/relationship/index_test.go | 358 ++++++++---------- .../relationships/indexed_relationships.go | 123 ------ .../indexed_relationships_test.go | 74 ---- .../common/spdxhelpers/to_format_model.go | 10 +- .../spdxhelpers/to_format_model_test.go | 4 +- syft/pkg/cataloger/python/dependency.go | 8 +- syft/sbom/sbom.go | 6 +- 10 files changed, 338 insertions(+), 480 deletions(-) delete mode 100644 internal/relationships/indexed_relationships.go delete mode 100644 internal/relationships/indexed_relationships_test.go diff --git a/cmd/syft/internal/test/integration/package_binary_elf_relationships_test.go b/cmd/syft/internal/test/integration/package_binary_elf_relationships_test.go index a088a6b4db9..a76a30e1e5d 100644 --- a/cmd/syft/internal/test/integration/package_binary_elf_relationships_test.go +++ b/cmd/syft/internal/test/integration/package_binary_elf_relationships_test.go @@ -5,7 +5,7 @@ import ( "github.com/stretchr/testify/require" - "github.com/anchore/syft/internal/relationships" + "github.com/anchore/syft/internal/relationship" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/source" ) @@ -45,13 +45,13 @@ func TestBinaryElfRelationships(t *testing.T) { } } - relationshipIndex := relationships.NewIndex(sbom.Relationships) + relationshipIndex := relationship.NewIndex(sbom.Relationships...) for name, expectedDepNames := range expectedGraph { pkgId := nameToId[name] p := sbom.Artifacts.Packages.Package(pkgId) require.NotNil(t, p, "expected package %q to be present in the SBOM", name) - rels := relationshipIndex.ToAndFrom(*p, artifact.DependencyOfRelationship) + rels := relationshipIndex.References(*p, artifact.DependencyOfRelationship) require.NotEmpty(t, rels, "expected package %q to have relationships", name) toIds := map[artifact.ID]struct{}{} diff --git a/internal/relationship/binary/binary_dependencies.go b/internal/relationship/binary/binary_dependencies.go index e30b64282e6..b23b42b9a42 100644 --- a/internal/relationship/binary/binary_dependencies.go +++ b/internal/relationship/binary/binary_dependencies.go @@ -21,27 +21,22 @@ func NewDependencyRelationships(resolver file.Resolver, accessor sbomsync.Access // 3. craft package-to-package relationships for each binary that represent shared library dependencies //note: we only care about package-to-package relationships - var relIndex *relationship.Index - accessor.ReadFromSBOM(func(s *sbom.SBOM) { - relIndex = relationship.NewIndex(s.Relationships...) - }) - - return generateRelationships(resolver, accessor, index, relIndex) + return generateRelationships(resolver, accessor, index) } -func generateRelationships(resolver file.Resolver, accessor sbomsync.Accessor, index *sharedLibraryIndex, relIndex *relationship.Index) []artifact.Relationship { - // read all existing dependencyOf relationships +func generateRelationships(resolver file.Resolver, accessor sbomsync.Accessor, index *sharedLibraryIndex) []artifact.Relationship { + newRelationships := relationship.NewIndex() + + // find all package-to-package relationships for shared library dependencies accessor.ReadFromSBOM(func(s *sbom.SBOM) { - for _, r := range s.Relationships { - if r.Type != artifact.DependencyOfRelationship { - continue + relIndex := relationship.NewIndex(s.Relationships...) + + addRelationship := func(r artifact.Relationship) { + if !relIndex.Contains(r) { + newRelationships.Add(r) } - relIndex.Track(r) } - }) - // find all package-to-package relationships for shared library dependencies - accessor.ReadFromSBOM(func(s *sbom.SBOM) { for _, parentPkg := range s.Artifacts.Packages.Sorted(pkg.BinaryPkg) { for _, evidentLocation := range parentPkg.Locations.ToSlice() { if evidentLocation.Annotations[pkg.EvidenceAnnotationKey] != pkg.PrimaryEvidenceAnnotation { @@ -54,12 +49,12 @@ func generateRelationships(resolver file.Resolver, accessor sbomsync.Accessor, i continue } - populateRelationships(exec, parentPkg, resolver, relIndex, index) + populateRelationships(exec, parentPkg, resolver, addRelationship, index) } } }) - return relIndex.NewRelationships() + return newRelationships.All() } // PackagesToRemove returns a list of binary packages (resolved by the ELF cataloger) that should be removed from the SBOM @@ -147,7 +142,7 @@ func getBinaryPackagesToDelete(resolver file.Resolver, s *sbom.SBOM) []artifact. return pkgsToDelete } -func populateRelationships(exec file.Executable, parentPkg pkg.Package, resolver file.Resolver, relIndex *relationship.Index, index *sharedLibraryIndex) { +func populateRelationships(exec file.Executable, parentPkg pkg.Package, resolver file.Resolver, addRelationship func(artifact.Relationship), index *sharedLibraryIndex) { for _, libReference := range exec.ImportedLibraries { // for each library reference, check s.Artifacts.Packages.Sorted(pkg.BinaryPkg) for a binary package that represents that library // if found, create a relationship between the parent package and the library package @@ -167,7 +162,7 @@ func populateRelationships(exec file.Executable, parentPkg pkg.Package, resolver realBaseName := path.Base(loc.RealPath) pkgCollection := index.owningLibraryPackage(realBaseName) if pkgCollection.PackageCount() < 1 { - relIndex.Add( + addRelationship( artifact.Relationship{ From: loc.Coordinates, To: parentPkg, @@ -176,7 +171,7 @@ func populateRelationships(exec file.Executable, parentPkg pkg.Package, resolver ) } for _, p := range pkgCollection.Sorted() { - relIndex.Add( + addRelationship( artifact.Relationship{ From: p, To: parentPkg, diff --git a/internal/relationship/index.go b/internal/relationship/index.go index e992116065b..bda5dadb181 100644 --- a/internal/relationship/index.go +++ b/internal/relationship/index.go @@ -1,88 +1,182 @@ package relationship import ( - "github.com/scylladb/go-set/strset" + "slices" + "strings" "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" ) +// Index provides an indexed set of relationships for easy location and comparison type Index struct { - typesByFromTo map[artifact.ID]map[artifact.ID]*strset.Set - existing []artifact.Relationship - additional []artifact.Relationship + all []*sortableRelationship + byFromID map[artifact.ID]*mapped + byToID map[artifact.ID]*mapped } -func NewIndex(existing ...artifact.Relationship) *Index { - r := &Index{ - typesByFromTo: make(map[artifact.ID]map[artifact.ID]*strset.Set), - } - r.TrackAll(existing...) - return r +// NewIndex returns a new relationship Index +func NewIndex(relationships ...artifact.Relationship) *Index { + out := Index{} + out.Add(relationships...) + return &out } -func (i *Index) track(r artifact.Relationship) bool { - fromID := r.From.ID() - if _, ok := i.typesByFromTo[fromID]; !ok { - i.typesByFromTo[fromID] = make(map[artifact.ID]*strset.Set) +// Add adds all the given relationships to the index, without adding duplicates +func (i *Index) Add(relationships ...artifact.Relationship) { + if i.byFromID == nil { + i.byFromID = map[artifact.ID]*mapped{} } - - toID := r.To.ID() - if _, ok := i.typesByFromTo[fromID][toID]; !ok { - i.typesByFromTo[fromID][toID] = strset.New() + if i.byToID == nil { + i.byToID = map[artifact.ID]*mapped{} } - var exists bool - if i.typesByFromTo[fromID][toID].Has(string(r.Type)) { - exists = true + // store appropriate indexes for stable ordering to minimize ID() calls + for _, r := range relationships { + // prevent duplicates + if i.Contains(r) { + continue + } + + fromID := r.From.ID() + toID := r.To.ID() + + m := i.byFromID[fromID] + if m == nil { + m = &mapped{} + i.byFromID[fromID] = m + } + sortableFrom := &sortableRelationship{ + from: fromID, + to: toID, + rel: r, + } + + // add to all relationships + i.all = append(i.all, sortableFrom) + + // add to from -> to mapping + m.add(toID, sortableFrom) + + m = i.byToID[toID] + if m == nil { + m = &mapped{} + i.byToID[toID] = m + } + + // add to the to -> from mapping + m.add(fromID, sortableFrom) } +} - i.typesByFromTo[fromID][toID].Add(string(r.Type)) +// From returns all relationships from the given identifiable, with specified types +func (i *Index) From(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship { + return fromMapped(i.byFromID, identifiable, types) +} + +// To returns all relationships to the given identifiable, with specified types +func (i *Index) To(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship { + return fromMapped(i.byToID, identifiable, types) +} - return !exists +// References returns all relationships that references to or from the given identifiable +func (i *Index) References(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship { + return append(i.To(identifiable, types...), i.From(identifiable, types...)...) } -// Track this relationship as "exists" in the index (this is used to prevent duplicate relationships from being added). -// returns true if the relationship is new to the index, false otherwise. -func (i *Index) Track(r artifact.Relationship) bool { - unique := i.track(r) - if unique { - i.existing = append(i.existing, r) +// Coordinates returns all coordinates for the provided identifiable for provided relationship types +// If no types are provided, all relationship types are considered. +func (i *Index) Coordinates(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []file.Coordinates { + var coordinates []file.Coordinates + for _, relationship := range append(i.To(identifiable, types...), i.From(identifiable, types...)...) { + cords := extractCoordinates(relationship) + coordinates = append(coordinates, cords...) } - return unique + return coordinates } -// Add a new relationship to the index, returning true if the relationship is new to the index, false otherwise (thus is a duplicate). -func (i *Index) Add(r artifact.Relationship) bool { - if i.track(r) { - i.additional = append(i.additional, r) - return true +// Contains indicates the relationship is present in this index +func (i *Index) Contains(r artifact.Relationship) bool { + if m := i.byFromID[r.From.ID()]; m != nil { + if ids := m.types[(r.Type)]; ids != nil { + return ids[(r.To.ID())] != nil + } } return false } -func (i *Index) TrackAll(rs ...artifact.Relationship) { - for _, r := range rs { - i.Track(r) +// All returns a sorted set of relationships matching all types, or all relationships if no types specified +func (i *Index) All(types ...artifact.RelationshipType) []artifact.Relationship { + return collect(i.all, types) +} + +func fromMapped(mappedIDs map[artifact.ID]*mapped, identifiable artifact.Identifiable, types []artifact.RelationshipType) []artifact.Relationship { + if identifiable == nil { + return nil + } + m := mappedIDs[identifiable.ID()] + if m == nil { + return nil } + return collect(m.rels, types) } -func (i *Index) AddAll(rs ...artifact.Relationship) { - for _, r := range rs { - i.Add(r) +func collect(rels []*sortableRelationship, types []artifact.RelationshipType) []artifact.Relationship { + // always return sorted lists for SBOM stability; the sorting could be handled elsewhere + slices.SortFunc(rels, sortFunc) + var out []artifact.Relationship + for _, r := range rels { + if len(types) == 0 || slices.Contains(types, r.rel.Type) { + out = append(out, r.rel) + } } + return out } -func (i *Index) NewRelationships() []artifact.Relationship { - return i.additional +func extractCoordinates(relationship artifact.Relationship) (results []file.Coordinates) { + if coordinates, exists := relationship.From.(file.Coordinates); exists { + results = append(results, coordinates) + } + + if coordinates, exists := relationship.To.(file.Coordinates); exists { + results = append(results, coordinates) + } + + return results +} + +type sortableRelationship struct { + from artifact.ID + to artifact.ID + rel artifact.Relationship +} + +func sortFunc(a, b *sortableRelationship) int { + cmp := strings.Compare(string(a.rel.Type), string(b.rel.Type)) + if cmp != 0 { + return cmp + } + cmp = strings.Compare(string(a.from), string(b.from)) + if cmp != 0 { + return cmp + } + return strings.Compare(string(a.to), string(b.to)) } -func (i *Index) ExistingRelationships() []artifact.Relationship { - return i.existing +type mapped struct { + types map[artifact.RelationshipType]map[artifact.ID]*sortableRelationship + rels []*sortableRelationship } -func (i *Index) AllUniqueRelationships() []artifact.Relationship { - var all []artifact.Relationship - all = append(all, i.existing...) - all = append(all, i.additional...) - return all +func (m *mapped) add(id artifact.ID, r *sortableRelationship) { + m.rels = append(m.rels, r) + if m.types == nil { + m.types = map[artifact.RelationshipType]map[artifact.ID]*sortableRelationship{} + } + tm := m.types[(r.rel.Type)] + if tm == nil { + tm = map[artifact.ID]*sortableRelationship{} + m.types[(r.rel.Type)] = tm + } + tm[id] = r } diff --git a/internal/relationship/index_test.go b/internal/relationship/index_test.go index b1acbc21463..ae4004ee63f 100644 --- a/internal/relationship/index_test.go +++ b/internal/relationship/index_test.go @@ -3,223 +3,187 @@ package relationship import ( "testing" - "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/require" "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" ) -func Test_newRelationshipIndex(t *testing.T) { - from := fakeIdentifiable{id: "from"} - to := fakeIdentifiable{id: "to"} - tests := []struct { - name string - given []artifact.Relationship - track []artifact.Relationship - add []artifact.Relationship - wantExisting []string - wantAdditional []string - }{ - { - name: "empty", - }, - { - name: "tracks existing relationships", - given: []artifact.Relationship{ - { - From: from, - To: to, - Type: artifact.EvidentByRelationship, - }, - }, - wantExisting: []string{"from [evident-by] to"}, - }, - { - name: "deduplicate tracked relationships", - given: []artifact.Relationship{ - { - From: from, - To: to, - Type: artifact.EvidentByRelationship, - }, - { - From: from, - To: to, - Type: artifact.EvidentByRelationship, - }, - { - From: from, - To: to, - Type: artifact.EvidentByRelationship, - }, - }, - track: []artifact.Relationship{ - { - From: from, - To: to, - Type: artifact.EvidentByRelationship, - }, - { - From: from, - To: to, - Type: artifact.EvidentByRelationship, - }, - }, - wantExisting: []string{"from [evident-by] to"}, - }, - { - name: "deduplicate any input relationships", - given: []artifact.Relationship{ - { - From: from, - To: to, - Type: artifact.EvidentByRelationship, - }, - { - From: from, - To: to, - Type: artifact.EvidentByRelationship, - }, - }, - track: []artifact.Relationship{ - { - From: from, - To: to, - Type: artifact.EvidentByRelationship, - }, - { - From: from, - To: to, - Type: artifact.EvidentByRelationship, - }, - }, - add: []artifact.Relationship{ - { - From: from, - To: to, - Type: artifact.EvidentByRelationship, - }, - { - From: from, - To: to, - Type: artifact.EvidentByRelationship, - }, - }, - wantExisting: []string{"from [evident-by] to"}, - }, - { - name: "deduplicate any added relationships", - add: []artifact.Relationship{ - { - From: from, - To: to, - Type: artifact.EvidentByRelationship, - }, - { - From: from, - To: to, - Type: artifact.EvidentByRelationship, - }, - }, - wantAdditional: []string{"from [evident-by] to"}, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - idx := NewIndex(tt.given...) - idx.TrackAll(tt.track...) - idx.AddAll(tt.add...) - diffRelationships(t, tt.wantExisting, idx.existing) - diffRelationships(t, tt.wantAdditional, idx.additional) - }) +func Test_Index(t *testing.T) { + p1 := pkg.Package{ + Name: "pkg-1", + } + p2 := pkg.Package{ + Name: "pkg-2", + } + p3 := pkg.Package{ + Name: "pkg-3", + } + c1 := file.Coordinates{ + RealPath: "/coords/1", + } + c2 := file.Coordinates{ + RealPath: "/coords/2", } -} -func diffRelationships(t *testing.T, expected []string, actual []artifact.Relationship) { - if d := cmp.Diff(expected, stringRelationships(actual)); d != "" { - t.Errorf("unexpected relationships (-want, +got): %s", d) + for _, p := range []*pkg.Package{&p1, &p2, &p3} { + p.SetID() + } + + r1 := artifact.Relationship{ + From: p1, + To: p2, + Type: artifact.DependencyOfRelationship, + } + r2 := artifact.Relationship{ + From: p1, + To: p3, + Type: artifact.DependencyOfRelationship, + } + r3 := artifact.Relationship{ + From: p1, + To: c1, + Type: artifact.ContainsRelationship, + } + r4 := artifact.Relationship{ + From: p2, + To: c2, + Type: artifact.ContainsRelationship, + } + r5 := artifact.Relationship{ + From: p3, + To: c2, + Type: artifact.ContainsRelationship, } -} -func stringRelationships(relationships []artifact.Relationship) []string { - var result []string - for _, r := range relationships { - result = append(result, string(r.From.ID())+" ["+string(r.Type)+"] "+string(r.To.ID())) + dup := artifact.Relationship{ + From: p3, + To: c2, + Type: artifact.ContainsRelationship, } - return result + idx := NewIndex(r1, r2, r3, r4, r5, dup) + require.ElementsMatch(t, slice(r3, r4, r5, r2, r1), idx.All()) + + require.ElementsMatch(t, slice(r1, r4), idx.References(p2)) + require.ElementsMatch(t, slice(r4), idx.References(p2, artifact.ContainsRelationship)) + + require.ElementsMatch(t, slice(r1), idx.To(p2)) + require.ElementsMatch(t, []artifact.Relationship(nil), idx.To(p2, artifact.ContainsRelationship)) + + require.ElementsMatch(t, slice(r4), idx.From(p2)) + require.ElementsMatch(t, slice(r4), idx.From(p2, artifact.ContainsRelationship)) } -func Test_relationshipIndex_track(t *testing.T) { - from := fakeIdentifiable{id: "from"} - to := fakeIdentifiable{id: "to"} - relationship := artifact.Relationship{From: from, To: to, Type: artifact.EvidentByRelationship} - tests := []struct { - name string - existing []artifact.Relationship - given artifact.Relationship - want bool - }{ - { - name: "track returns true for a new relationship", - existing: []artifact.Relationship{}, - given: relationship, - want: true, - }, - { - name: "track returns false for an existing relationship", - existing: []artifact.Relationship{relationship}, - given: relationship, - want: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - i := NewIndex(tt.existing...) - if got := i.Track(tt.given); got != tt.want { - t.Errorf("track() = %v, want %v", got, tt.want) - } - }) +func Test_sortOrder(t *testing.T) { + r1 := artifact.Relationship{ + From: fakeIdentifiable{"1"}, + To: fakeIdentifiable{"2"}, + Type: artifact.ContainsRelationship, + } + r2 := artifact.Relationship{ + From: fakeIdentifiable{"2"}, + To: fakeIdentifiable{"3"}, + Type: artifact.ContainsRelationship, + } + r3 := artifact.Relationship{ + From: fakeIdentifiable{"3"}, + To: fakeIdentifiable{"4"}, + Type: artifact.ContainsRelationship, + } + r4 := artifact.Relationship{ + From: fakeIdentifiable{"1"}, + To: fakeIdentifiable{"2"}, + Type: artifact.DependencyOfRelationship, + } + r5 := artifact.Relationship{ + From: fakeIdentifiable{"2"}, + To: fakeIdentifiable{"3"}, + Type: artifact.DependencyOfRelationship, + } + dup := artifact.Relationship{ + From: fakeIdentifiable{"2"}, + To: fakeIdentifiable{"3"}, + Type: artifact.DependencyOfRelationship, } + + // should have a stable sort order when retrieving elements + idx := NewIndex(r1, r2, r3, r4, r5, dup) + require.ElementsMatch(t, slice(r3, r4, r5, r2, r1), idx.All()) + + require.ElementsMatch(t, slice(r4, r1), idx.From(fakeIdentifiable{"1"})) } -func Test_relationshipIndex_add(t *testing.T) { - from := fakeIdentifiable{id: "from"} - to := fakeIdentifiable{id: "to"} - relationship := artifact.Relationship{From: from, To: to, Type: artifact.EvidentByRelationship} - tests := []struct { - name string - existing []artifact.Relationship - given artifact.Relationship - want bool - }{ - { - name: "add returns true for a new relationship", - existing: []artifact.Relationship{}, - given: relationship, - want: true, - }, - { - name: "add returns false for an existing relationship", - existing: []artifact.Relationship{relationship}, - given: relationship, - want: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - i := NewIndex(tt.existing...) - if got := i.Add(tt.given); got != tt.want { - t.Errorf("add() = %v, want %v", got, tt.want) - } - }) +func Test_Coordinates(t *testing.T) { + p1 := pkg.Package{ + Name: "pkg-1", + } + p2 := pkg.Package{ + Name: "pkg-2", + } + p3 := pkg.Package{ + Name: "pkg-3", + } + c1 := file.Coordinates{ + RealPath: "/coords/1", + } + c2 := file.Coordinates{ + RealPath: "/coords/2", + } + + for _, p := range []*pkg.Package{&p1, &p2, &p3} { + p.SetID() } + r1 := artifact.Relationship{ + From: p1, + To: p2, + Type: artifact.DependencyOfRelationship, + } + r2 := artifact.Relationship{ + From: p1, + To: p3, + Type: artifact.DependencyOfRelationship, + } + r3 := artifact.Relationship{ + From: p1, + To: c1, + Type: artifact.ContainsRelationship, + } + r4 := artifact.Relationship{ + From: p2, + To: c2, + Type: artifact.ContainsRelationship, + } + r5 := artifact.Relationship{ + From: p3, + To: c1, + Type: artifact.ContainsRelationship, + } + r6 := artifact.Relationship{ + From: p3, + To: c2, + Type: artifact.ContainsRelationship, + } + + idx := NewIndex(r1, r2, r3, r4, r5, r6) + + got := idx.Coordinates(p1) + require.ElementsMatch(t, slice(c1), got) + + got = idx.Coordinates(p3) + require.ElementsMatch(t, slice(c1, c2), got) } type fakeIdentifiable struct { - id string + value string +} + +func (i fakeIdentifiable) ID() artifact.ID { + return artifact.ID(i.value) } -func (f fakeIdentifiable) ID() artifact.ID { - return artifact.ID(f.id) +func slice[T any](values ...T) []T { + return values } diff --git a/internal/relationships/indexed_relationships.go b/internal/relationships/indexed_relationships.go deleted file mode 100644 index cffb90cad8b..00000000000 --- a/internal/relationships/indexed_relationships.go +++ /dev/null @@ -1,123 +0,0 @@ -package relationships - -import ( - "slices" - "strings" - - "github.com/anchore/syft/syft/artifact" - "github.com/anchore/syft/syft/file" -) - -type Index struct { - byFromID map[artifact.ID][]artifact.Relationship - byToID map[artifact.ID][]artifact.Relationship -} - -// NewIndex returns a new relationship Index -func NewIndex(relationships []artifact.Relationship) *Index { - out := &Index{ - byFromID: map[artifact.ID][]artifact.Relationship{}, - byToID: map[artifact.ID][]artifact.Relationship{}, - } - - fromIDs := map[artifact.ID][]sortableRelationship{} - toIDs := map[artifact.ID][]sortableRelationship{} - - // store appropriate indexes for stable ordering to minimize ID() calls - for _, r := range relationships { - fromID := r.From.ID() - toID := r.To.ID() - - fromIDs[fromID] = append(fromIDs[fromID], sortableRelationship{string(toID), r}) - out.byToID[toID] = append(out.byToID[toID], r) - } - - for k, v := range fromIDs { - out.byFromID[k] = collect(v) - } - - for k, v := range toIDs { - out.byToID[k] = collect(v) - } - - return out -} - -// From returns all relationships from the given identifiable type -func (i *Index) From(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship { - if identifiable == nil { - return nil - } - var out []artifact.Relationship - for _, r := range i.byFromID[identifiable.ID()] { - if len(types) == 0 || slices.Contains(types, r.Type) { - out = append(out, r) - } - } - return out -} - -// To returns all relationships to the given identifiable type -func (i *Index) To(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship { - if identifiable == nil { - return nil - } - var out []artifact.Relationship - for _, r := range i.byToID[identifiable.ID()] { - if len(types) == 0 || slices.Contains(types, r.Type) { - out = append(out, r) - } - } - return out -} - -// ToAndFrom returns all relationships to or from the given identifiable type -func (i *Index) ToAndFrom(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship { - return append(i.To(identifiable, types...), i.From(identifiable, types...)...) -} - -// Coordinates returns all coordinates for the provided identifiable for provided relationship types -// If no types are provided, all relationship types are considered. -func (i *Index) Coordinates(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []file.Coordinates { - var coordinates []file.Coordinates - for _, relationship := range i.ToAndFrom(identifiable, types...) { - cords := extractCoordinates(relationship) - coordinates = append(coordinates, cords...) - } - return coordinates -} - -func extractCoordinates(relationship artifact.Relationship) (results []file.Coordinates) { - if coordinates, exists := relationship.From.(file.Coordinates); exists { - results = append(results, coordinates) - } - - if coordinates, exists := relationship.To.(file.Coordinates); exists { - results = append(results, coordinates) - } - - return results -} - -type sortableRelationship struct { - key string - rel artifact.Relationship -} - -func collect(relationships []sortableRelationship) []artifact.Relationship { - slices.SortFunc(relationships, sortFunc) - - var out []artifact.Relationship - for _, rel := range relationships { - out = append(out, rel.rel) - } - return out -} - -func sortFunc(a, b sortableRelationship) int { - cmp := strings.Compare(string(a.rel.Type), string(b.rel.Type)) - if cmp != 0 { - return cmp - } - return strings.Compare(a.key, b.key) -} diff --git a/internal/relationships/indexed_relationships_test.go b/internal/relationships/indexed_relationships_test.go deleted file mode 100644 index a82b381c59c..00000000000 --- a/internal/relationships/indexed_relationships_test.go +++ /dev/null @@ -1,74 +0,0 @@ -package relationships - -import ( - "testing" - - "github.com/stretchr/testify/require" - - "github.com/anchore/syft/syft/artifact" - "github.com/anchore/syft/syft/file" - "github.com/anchore/syft/syft/pkg" -) - -func Test_Index(t *testing.T) { - p1 := pkg.Package{ - Name: "pkg-1", - } - p2 := pkg.Package{ - Name: "pkg-2", - } - p3 := pkg.Package{ - Name: "pkg-3", - } - c1 := file.Coordinates{ - RealPath: "/coords/1", - } - c2 := file.Coordinates{ - RealPath: "/coords/2", - } - - for _, p := range []*pkg.Package{&p1, &p2, &p3} { - p.SetID() - } - - r1 := artifact.Relationship{ - From: p1, - To: p2, - Type: artifact.DependencyOfRelationship, - } - r2 := artifact.Relationship{ - From: p1, - To: p3, - Type: artifact.DependencyOfRelationship, - } - r3 := artifact.Relationship{ - From: p1, - To: c1, - Type: artifact.ContainsRelationship, - } - r4 := artifact.Relationship{ - From: p2, - To: c2, - Type: artifact.ContainsRelationship, - } - r5 := artifact.Relationship{ - From: p3, - To: c2, - Type: artifact.ContainsRelationship, - } - - idx := NewIndex([]artifact.Relationship{r1, r2, r3, r4, r5}) - - require.EqualValues(t, idx.ToAndFrom(p2), rels(r1, r4)) - require.EqualValues(t, idx.ToAndFrom(p2, artifact.ContainsRelationship), rels(r4)) - - require.EqualValues(t, idx.To(p2), rels(r1)) - require.EqualValues(t, idx.To(p2, artifact.ContainsRelationship), rels()) - - require.EqualValues(t, idx.From(p2), rels(r4)) - require.EqualValues(t, idx.From(p2, artifact.ContainsRelationship), rels(r4)) -} - -func rels(values ...artifact.Relationship) []artifact.Relationship { - return values -} diff --git a/syft/format/common/spdxhelpers/to_format_model.go b/syft/format/common/spdxhelpers/to_format_model.go index 83ef123f438..3fd136e84df 100644 --- a/syft/format/common/spdxhelpers/to_format_model.go +++ b/syft/format/common/spdxhelpers/to_format_model.go @@ -16,7 +16,7 @@ import ( "github.com/anchore/packageurl-go" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/mimetype" - "github.com/anchore/syft/internal/relationships" + "github.com/anchore/syft/internal/relationship" "github.com/anchore/syft/internal/spdxlicense" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" @@ -46,10 +46,10 @@ const ( func ToFormatModel(s sbom.SBOM) *spdx.Document { name, namespace := helpers.DocumentNameAndNamespace(s.Source, s.Descriptor) - rels := relationships.NewIndex(s.Relationships) + rels := relationship.NewIndex(s.Relationships...) packages := toPackages(rels, s.Artifacts.Packages, s) - allRelationships := toRelationships(s.RelationshipsSorted()) + allRelationships := toRelationships(rels.All()) // for valid SPDX we need a document describes relationship describesID := spdx.ElementID("DOCUMENT") @@ -304,7 +304,7 @@ func toSPDXID(identifiable artifact.Identifiable) spdx.ElementID { // packages populates all Package Information from the package Collection (see https://spdx.github.io/spdx-spec/3-package-information/) // //nolint:funlen -func toPackages(rels *relationships.Index, catalog *pkg.Collection, sbom sbom.SBOM) (results []*spdx.Package) { +func toPackages(rels *relationship.Index, catalog *pkg.Collection, sbom sbom.SBOM) (results []*spdx.Package) { for _, p := range catalog.Sorted() { // name should be guaranteed to be unique, but semantically useful and stable id := toSPDXID(p) @@ -746,7 +746,7 @@ func toOtherLicenses(catalog *pkg.Collection) []*spdx.OtherLicense { // f file is an "excludes" file, skip it /* exclude SPDX analysis file(s) */ // see: https://spdx.github.io/spdx-spec/v2.3/package-information/#79-package-verification-code-field // the above link contains the SPDX algorithm for a package verification code -func newPackageVerificationCode(rels *relationships.Index, p pkg.Package, sbom sbom.SBOM) *spdx.PackageVerificationCode { +func newPackageVerificationCode(rels *relationship.Index, p pkg.Package, sbom sbom.SBOM) *spdx.PackageVerificationCode { // key off of the contains relationship; // spdx validator will fail if a package claims to contain a file but no sha1 provided // if a sha1 for a file is provided then the validator will fail if the package does not have diff --git a/syft/format/common/spdxhelpers/to_format_model_test.go b/syft/format/common/spdxhelpers/to_format_model_test.go index d5d19d416b1..e986069bb3a 100644 --- a/syft/format/common/spdxhelpers/to_format_model_test.go +++ b/syft/format/common/spdxhelpers/to_format_model_test.go @@ -12,7 +12,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/anchore/syft/internal/relationships" + "github.com/anchore/syft/internal/relationship" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/format/internal/spdxutil/helpers" @@ -666,7 +666,7 @@ func Test_H1Digest(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { catalog := pkg.NewCollection(test.pkg) - pkgs := toPackages(relationships.NewIndex(nil), catalog, s) + pkgs := toPackages(relationship.NewIndex(), catalog, s) require.Len(t, pkgs, 1) for _, p := range pkgs { if test.expectedDigest == "" { diff --git a/syft/pkg/cataloger/python/dependency.go b/syft/pkg/cataloger/python/dependency.go index 7479649d062..6110281e902 100644 --- a/syft/pkg/cataloger/python/dependency.go +++ b/syft/pkg/cataloger/python/dependency.go @@ -167,7 +167,7 @@ func wheelEggRelationships(ctx context.Context, resolver file.Resolver, pkgs []p if err != nil { return nil, nil, fmt.Errorf("failed to resolve relationships for global site package %q: %w", globalSitePackage, err) } - relationshipIndex.AddAll(siteRels...) + relationshipIndex.Add(siteRels...) } // create relationships between packages within each virtual env site package directory (that doesn't link to a global site-packages directory) @@ -180,7 +180,7 @@ func wheelEggRelationships(ctx context.Context, resolver file.Resolver, pkgs []p if err != nil { return nil, nil, fmt.Errorf("failed to resolve relationships for virtualenv site package %q: %w", venv.SitePackagesPath, err) } - relationshipIndex.AddAll(siteRels...) + relationshipIndex.Add(siteRels...) } // create relationships between packages within each virtual env site package directory (that links to a global site package directory) @@ -197,10 +197,10 @@ func wheelEggRelationships(ctx context.Context, resolver file.Resolver, pkgs []p return nil, nil, fmt.Errorf("failed to resolve relationships for virtualenv + global site package path %q + %q: %w", venv.SitePackagesPath, globalSitePackage, err) } - relationshipIndex.AddAll(siteRels...) + relationshipIndex.Add(siteRels...) } - return pkgs, relationshipIndex.AllUniqueRelationships(), err + return pkgs, relationshipIndex.All(), err } func collectPackages(pkgsBySitePackageAndName map[string]map[string]pkg.Package, sites []string) []pkg.Package { diff --git a/syft/sbom/sbom.go b/syft/sbom/sbom.go index 7c29457343e..ba3f95f3d46 100644 --- a/syft/sbom/sbom.go +++ b/syft/sbom/sbom.go @@ -35,6 +35,8 @@ type Descriptor struct { Configuration interface{} } +// RelationshipsSorted returns a sorted slice of all relationships +// Deprecated -- use relationship.Index func (s SBOM) RelationshipsSorted() []artifact.Relationship { relationships := s.Relationships sort.SliceStable(relationships, func(i, j int) bool { @@ -70,7 +72,7 @@ func (s SBOM) AllCoordinates() []file.Coordinates { // RelationshipsForPackage returns all relationships for the provided types. // If no types are provided, all relationships for the package are returned. -// Deprecated -- use relationships.Index +// Deprecated -- use relationship.Index func (s SBOM) RelationshipsForPackage(p pkg.Package, rt ...artifact.RelationshipType) []artifact.Relationship { if len(rt) == 0 { rt = artifact.AllRelationshipTypes() @@ -104,7 +106,7 @@ func (s SBOM) RelationshipsForPackage(p pkg.Package, rt ...artifact.Relationship // CoordinatesForPackage returns all coordinates for the provided package for provided relationship types // If no types are provided, all relationship types are considered. -// Deprecated -- use relationships.Index +// Deprecated -- use relationship.Index func (s SBOM) CoordinatesForPackage(p pkg.Package, rt ...artifact.RelationshipType) []file.Coordinates { var coordinates []file.Coordinates for _, relationship := range s.RelationshipsForPackage(p, rt...) { From 938c341c500f23322ea9d94a70bd800cd5615865 Mon Sep 17 00:00:00 2001 From: Keith Zantow Date: Mon, 22 Jul 2024 21:29:01 -0400 Subject: [PATCH 4/5] chore: update tests Signed-off-by: Keith Zantow --- .../binary/binary_dependencies_test.go | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/internal/relationship/binary/binary_dependencies_test.go b/internal/relationship/binary/binary_dependencies_test.go index 8696b82b476..33c6436c351 100644 --- a/internal/relationship/binary/binary_dependencies_test.go +++ b/internal/relationship/binary/binary_dependencies_test.go @@ -2,6 +2,7 @@ package binary import ( "path" + "strings" "testing" "github.com/google/go-cmp/cmp" @@ -328,7 +329,20 @@ func relationshipComparer(x, y []artifact.Relationship) string { artifact.Relationship{}, file.LocationSet{}, pkg.LicenseSet{}, - )) + ), cmpopts.SortSlices(lessRelationships)) +} + +func lessRelationships(r1, r2 artifact.Relationship) bool { + c := strings.Compare(string(r1.Type), string(r2.Type)) + if c != 0 { + return c < 0 + } + c = strings.Compare(string(r1.From.ID()), string(r2.From.ID())) + if c != 0 { + return c < 0 + } + c = strings.Compare(string(r1.To.ID()), string(r2.To.ID())) + return c < 0 } func newAccessor(pkgs []pkg.Package, coordinateIndex map[file.Coordinates]file.Executable, preexistingRelationships []artifact.Relationship) sbomsync.Accessor { From 7441c1d3149fe3e763e0f030e9a5f70de23a073c Mon Sep 17 00:00:00 2001 From: Keith Zantow Date: Tue, 23 Jul 2024 04:17:46 -0400 Subject: [PATCH 5/5] chore: minor renaming Signed-off-by: Keith Zantow --- internal/relationship/index.go | 137 ++++++++++++++-------------- internal/relationship/index_test.go | 102 +++++++++++++++------ 2 files changed, 141 insertions(+), 98 deletions(-) diff --git a/internal/relationship/index.go b/internal/relationship/index.go index bda5dadb181..a94c84f8537 100644 --- a/internal/relationship/index.go +++ b/internal/relationship/index.go @@ -8,11 +8,11 @@ import ( "github.com/anchore/syft/syft/file" ) -// Index provides an indexed set of relationships for easy location and comparison +// Index indexes relationships, preventing duplicates type Index struct { - all []*sortableRelationship - byFromID map[artifact.ID]*mapped - byToID map[artifact.ID]*mapped + all []*sortableRelationship + fromID map[artifact.ID]*mappedRelationships + toID map[artifact.ID]*mappedRelationships } // NewIndex returns a new relationship Index @@ -24,11 +24,11 @@ func NewIndex(relationships ...artifact.Relationship) *Index { // Add adds all the given relationships to the index, without adding duplicates func (i *Index) Add(relationships ...artifact.Relationship) { - if i.byFromID == nil { - i.byFromID = map[artifact.ID]*mapped{} + if i.fromID == nil { + i.fromID = map[artifact.ID]*mappedRelationships{} } - if i.byToID == nil { - i.byToID = map[artifact.ID]*mapped{} + if i.toID == nil { + i.toID = map[artifact.ID]*mappedRelationships{} } // store appropriate indexes for stable ordering to minimize ID() calls @@ -41,54 +41,53 @@ func (i *Index) Add(relationships ...artifact.Relationship) { fromID := r.From.ID() toID := r.To.ID() - m := i.byFromID[fromID] - if m == nil { - m = &mapped{} - i.byFromID[fromID] = m - } - sortableFrom := &sortableRelationship{ - from: fromID, - to: toID, - rel: r, + relationship := &sortableRelationship{ + from: fromID, + to: toID, + relationship: r, } // add to all relationships - i.all = append(i.all, sortableFrom) - - // add to from -> to mapping - m.add(toID, sortableFrom) + i.all = append(i.all, relationship) - m = i.byToID[toID] - if m == nil { - m = &mapped{} - i.byToID[toID] = m + // add from -> to mapping + mapped := i.fromID[fromID] + if mapped == nil { + mapped = &mappedRelationships{} + i.fromID[fromID] = mapped } + mapped.add(toID, relationship) - // add to the to -> from mapping - m.add(fromID, sortableFrom) + // add to -> from mapping + mapped = i.toID[toID] + if mapped == nil { + mapped = &mappedRelationships{} + i.toID[toID] = mapped + } + mapped.add(fromID, relationship) } } // From returns all relationships from the given identifiable, with specified types func (i *Index) From(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship { - return fromMapped(i.byFromID, identifiable, types) + return toSortedSlice(fromMapped(i.fromID, identifiable), types) } // To returns all relationships to the given identifiable, with specified types func (i *Index) To(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship { - return fromMapped(i.byToID, identifiable, types) + return toSortedSlice(fromMapped(i.toID, identifiable), types) } -// References returns all relationships that references to or from the given identifiable +// References returns all relationships that reference to or from the given identifiable func (i *Index) References(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship { - return append(i.To(identifiable, types...), i.From(identifiable, types...)...) + return toSortedSlice(append(fromMapped(i.fromID, identifiable), fromMapped(i.toID, identifiable)...), types) } // Coordinates returns all coordinates for the provided identifiable for provided relationship types // If no types are provided, all relationship types are considered. func (i *Index) Coordinates(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []file.Coordinates { var coordinates []file.Coordinates - for _, relationship := range append(i.To(identifiable, types...), i.From(identifiable, types...)...) { + for _, relationship := range i.References(identifiable, types...) { cords := extractCoordinates(relationship) coordinates = append(coordinates, cords...) } @@ -97,9 +96,9 @@ func (i *Index) Coordinates(identifiable artifact.Identifiable, types ...artifac // Contains indicates the relationship is present in this index func (i *Index) Contains(r artifact.Relationship) bool { - if m := i.byFromID[r.From.ID()]; m != nil { - if ids := m.types[(r.Type)]; ids != nil { - return ids[(r.To.ID())] != nil + if mapped := i.fromID[r.From.ID()]; mapped != nil { + if ids := mapped.typeMap[r.Type]; ids != nil { + return ids[r.To.ID()] != nil } } return false @@ -107,27 +106,27 @@ func (i *Index) Contains(r artifact.Relationship) bool { // All returns a sorted set of relationships matching all types, or all relationships if no types specified func (i *Index) All(types ...artifact.RelationshipType) []artifact.Relationship { - return collect(i.all, types) + return toSortedSlice(i.all, types) } -func fromMapped(mappedIDs map[artifact.ID]*mapped, identifiable artifact.Identifiable, types []artifact.RelationshipType) []artifact.Relationship { - if identifiable == nil { +func fromMapped(idMap map[artifact.ID]*mappedRelationships, identifiable artifact.Identifiable) []*sortableRelationship { + if identifiable == nil || idMap == nil { return nil } - m := mappedIDs[identifiable.ID()] - if m == nil { + mapped := idMap[identifiable.ID()] + if mapped == nil { return nil } - return collect(m.rels, types) + return mapped.allRelated } -func collect(rels []*sortableRelationship, types []artifact.RelationshipType) []artifact.Relationship { - // always return sorted lists for SBOM stability; the sorting could be handled elsewhere - slices.SortFunc(rels, sortFunc) +func toSortedSlice(relationships []*sortableRelationship, types []artifact.RelationshipType) []artifact.Relationship { + // always return sorted for SBOM stability + slices.SortFunc(relationships, sortFunc) var out []artifact.Relationship - for _, r := range rels { - if len(types) == 0 || slices.Contains(types, r.rel.Type) { - out = append(out, r.rel) + for _, r := range relationships { + if len(types) == 0 || slices.Contains(types, r.relationship.Type) { + out = append(out, r.relationship) } } return out @@ -145,14 +144,32 @@ func extractCoordinates(relationship artifact.Relationship) (results []file.Coor return results } +type mappedRelationships struct { + typeMap map[artifact.RelationshipType]map[artifact.ID]*sortableRelationship + allRelated []*sortableRelationship +} + +func (m *mappedRelationships) add(id artifact.ID, newRelationship *sortableRelationship) { + m.allRelated = append(m.allRelated, newRelationship) + if m.typeMap == nil { + m.typeMap = map[artifact.RelationshipType]map[artifact.ID]*sortableRelationship{} + } + typeMap := m.typeMap[newRelationship.relationship.Type] + if typeMap == nil { + typeMap = map[artifact.ID]*sortableRelationship{} + m.typeMap[newRelationship.relationship.Type] = typeMap + } + typeMap[id] = newRelationship +} + type sortableRelationship struct { - from artifact.ID - to artifact.ID - rel artifact.Relationship + from artifact.ID + to artifact.ID + relationship artifact.Relationship } func sortFunc(a, b *sortableRelationship) int { - cmp := strings.Compare(string(a.rel.Type), string(b.rel.Type)) + cmp := strings.Compare(string(a.relationship.Type), string(b.relationship.Type)) if cmp != 0 { return cmp } @@ -162,21 +179,3 @@ func sortFunc(a, b *sortableRelationship) int { } return strings.Compare(string(a.to), string(b.to)) } - -type mapped struct { - types map[artifact.RelationshipType]map[artifact.ID]*sortableRelationship - rels []*sortableRelationship -} - -func (m *mapped) add(id artifact.ID, r *sortableRelationship) { - m.rels = append(m.rels, r) - if m.types == nil { - m.types = map[artifact.RelationshipType]map[artifact.ID]*sortableRelationship{} - } - tm := m.types[(r.rel.Type)] - if tm == nil { - tm = map[artifact.ID]*sortableRelationship{} - m.types[(r.rel.Type)] = tm - } - tm[id] = r -} diff --git a/internal/relationship/index_test.go b/internal/relationship/index_test.go index ae4004ee63f..1f4e66c27af 100644 --- a/internal/relationship/index_test.go +++ b/internal/relationship/index_test.go @@ -64,7 +64,7 @@ func Test_Index(t *testing.T) { } idx := NewIndex(r1, r2, r3, r4, r5, dup) - require.ElementsMatch(t, slice(r3, r4, r5, r2, r1), idx.All()) + require.ElementsMatch(t, slice(r1, r2, r3, r4, r5), idx.All()) require.ElementsMatch(t, slice(r1, r4), idx.References(p2)) require.ElementsMatch(t, slice(r4), idx.References(p2, artifact.ContainsRelationship)) @@ -78,41 +78,71 @@ func Test_Index(t *testing.T) { func Test_sortOrder(t *testing.T) { r1 := artifact.Relationship{ - From: fakeIdentifiable{"1"}, - To: fakeIdentifiable{"2"}, - Type: artifact.ContainsRelationship, + From: id("1"), + To: id("2"), + Type: "1", } r2 := artifact.Relationship{ - From: fakeIdentifiable{"2"}, - To: fakeIdentifiable{"3"}, - Type: artifact.ContainsRelationship, + From: id("2"), + To: id("3"), + Type: "1", } r3 := artifact.Relationship{ - From: fakeIdentifiable{"3"}, - To: fakeIdentifiable{"4"}, - Type: artifact.ContainsRelationship, + From: id("3"), + To: id("4"), + Type: "1", } r4 := artifact.Relationship{ - From: fakeIdentifiable{"1"}, - To: fakeIdentifiable{"2"}, - Type: artifact.DependencyOfRelationship, + From: id("1"), + To: id("2"), + Type: "2", } r5 := artifact.Relationship{ - From: fakeIdentifiable{"2"}, - To: fakeIdentifiable{"3"}, - Type: artifact.DependencyOfRelationship, + From: id("2"), + To: id("3"), + Type: "2", } dup := artifact.Relationship{ - From: fakeIdentifiable{"2"}, - To: fakeIdentifiable{"3"}, - Type: artifact.DependencyOfRelationship, + From: id("2"), + To: id("3"), + Type: "2", + } + r6 := artifact.Relationship{ + From: id("2"), + To: id("3"), + Type: "3", } - // should have a stable sort order when retrieving elements - idx := NewIndex(r1, r2, r3, r4, r5, dup) - require.ElementsMatch(t, slice(r3, r4, r5, r2, r1), idx.All()) + idx := NewIndex(r5, r2, r6, r4, r1, r3, dup) + require.EqualValues(t, slice(r1, r2, r3, r4, r5, r6), idx.All()) + + require.EqualValues(t, slice(r1, r4), idx.From(id("1"))) + + require.EqualValues(t, slice(r2, r5, r6), idx.To(id("3"))) + + rLast := artifact.Relationship{ + From: id("0"), + To: id("3"), + Type: "9999", + } - require.ElementsMatch(t, slice(r4, r1), idx.From(fakeIdentifiable{"1"})) + rFirst := artifact.Relationship{ + From: id("0"), + To: id("3"), + Type: "1", + } + + rMid := artifact.Relationship{ + From: id("0"), + To: id("1"), + Type: "2", + } + + idx.Add(rLast, rFirst, rMid) + + require.EqualValues(t, slice(rFirst, r1, r2, r3, rMid, r4, r5, r6, rLast), idx.All()) + + require.EqualValues(t, slice(rFirst, r2, r5, r6, rLast), idx.To(id("3"))) } func Test_Coordinates(t *testing.T) { @@ -131,6 +161,12 @@ func Test_Coordinates(t *testing.T) { c2 := file.Coordinates{ RealPath: "/coords/2", } + c3 := file.Coordinates{ + RealPath: "/coords/3", + } + c4 := file.Coordinates{ + RealPath: "/coords/4", + } for _, p := range []*pkg.Package{&p1, &p2, &p3} { p.SetID() @@ -166,8 +202,18 @@ func Test_Coordinates(t *testing.T) { To: c2, Type: artifact.ContainsRelationship, } + r7 := artifact.Relationship{ + From: c1, + To: c3, + Type: artifact.ContainsRelationship, + } + r8 := artifact.Relationship{ + From: c3, + To: c4, + Type: artifact.ContainsRelationship, + } - idx := NewIndex(r1, r2, r3, r4, r5, r6) + idx := NewIndex(r1, r2, r3, r4, r5, r6, r7, r8) got := idx.Coordinates(p1) require.ElementsMatch(t, slice(c1), got) @@ -176,12 +222,10 @@ func Test_Coordinates(t *testing.T) { require.ElementsMatch(t, slice(c1, c2), got) } -type fakeIdentifiable struct { - value string -} +type id string -func (i fakeIdentifiable) ID() artifact.ID { - return artifact.ID(i.value) +func (i id) ID() artifact.ID { + return artifact.ID(i) } func slice[T any](values ...T) []T {