Skip to content

Commit

Permalink
fix: SPDX output performance with many relationships (#3053)
Browse files Browse the repository at this point in the history
  • Loading branch information
kzantow authored Jul 24, 2024
1 parent 9573f55 commit 741c8fb
Show file tree
Hide file tree
Showing 9 changed files with 403 additions and 284 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (

"github.com/stretchr/testify/require"

"github.com/anchore/syft/internal/relationship"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/source"
)
Expand Down Expand Up @@ -44,12 +45,13 @@ func TestBinaryElfRelationships(t *testing.T) {
}
}

relationshipIndex := relationship.NewIndex(sbom.Relationships...)
for name, expectedDepNames := range expectedGraph {
pkgId := nameToId[name]
p := sbom.Artifacts.Packages.Package(pkgId)
require.NotNil(t, p, "expected package %q to be present in the SBOM", name)

rels := sbom.RelationshipsForPackage(*p, artifact.DependencyOfRelationship)
rels := relationshipIndex.References(*p, artifact.DependencyOfRelationship)
require.NotEmpty(t, rels, "expected package %q to have relationships", name)

toIds := map[artifact.ID]struct{}{}
Expand Down
35 changes: 15 additions & 20 deletions internal/relationship/binary/binary_dependencies.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,27 +21,22 @@ func NewDependencyRelationships(resolver file.Resolver, accessor sbomsync.Access

// 3. craft package-to-package relationships for each binary that represent shared library dependencies
//note: we only care about package-to-package relationships
var relIndex *relationship.Index
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
relIndex = relationship.NewIndex(s.Relationships...)
})

return generateRelationships(resolver, accessor, index, relIndex)
return generateRelationships(resolver, accessor, index)
}

func generateRelationships(resolver file.Resolver, accessor sbomsync.Accessor, index *sharedLibraryIndex, relIndex *relationship.Index) []artifact.Relationship {
// read all existing dependencyOf relationships
func generateRelationships(resolver file.Resolver, accessor sbomsync.Accessor, index *sharedLibraryIndex) []artifact.Relationship {
newRelationships := relationship.NewIndex()

// find all package-to-package relationships for shared library dependencies
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
for _, r := range s.Relationships {
if r.Type != artifact.DependencyOfRelationship {
continue
relIndex := relationship.NewIndex(s.Relationships...)

addRelationship := func(r artifact.Relationship) {
if !relIndex.Contains(r) {
newRelationships.Add(r)
}
relIndex.Track(r)
}
})

// find all package-to-package relationships for shared library dependencies
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
for _, parentPkg := range s.Artifacts.Packages.Sorted(pkg.BinaryPkg) {
for _, evidentLocation := range parentPkg.Locations.ToSlice() {
if evidentLocation.Annotations[pkg.EvidenceAnnotationKey] != pkg.PrimaryEvidenceAnnotation {
Expand All @@ -54,12 +49,12 @@ func generateRelationships(resolver file.Resolver, accessor sbomsync.Accessor, i
continue
}

populateRelationships(exec, parentPkg, resolver, relIndex, index)
populateRelationships(exec, parentPkg, resolver, addRelationship, index)
}
}
})

return relIndex.NewRelationships()
return newRelationships.All()
}

// PackagesToRemove returns a list of binary packages (resolved by the ELF cataloger) that should be removed from the SBOM
Expand Down Expand Up @@ -147,7 +142,7 @@ func getBinaryPackagesToDelete(resolver file.Resolver, s *sbom.SBOM) []artifact.
return pkgsToDelete
}

func populateRelationships(exec file.Executable, parentPkg pkg.Package, resolver file.Resolver, relIndex *relationship.Index, index *sharedLibraryIndex) {
func populateRelationships(exec file.Executable, parentPkg pkg.Package, resolver file.Resolver, addRelationship func(artifact.Relationship), index *sharedLibraryIndex) {
for _, libReference := range exec.ImportedLibraries {
// for each library reference, check s.Artifacts.Packages.Sorted(pkg.BinaryPkg) for a binary package that represents that library
// if found, create a relationship between the parent package and the library package
Expand All @@ -167,7 +162,7 @@ func populateRelationships(exec file.Executable, parentPkg pkg.Package, resolver
realBaseName := path.Base(loc.RealPath)
pkgCollection := index.owningLibraryPackage(realBaseName)
if pkgCollection.PackageCount() < 1 {
relIndex.Add(
addRelationship(
artifact.Relationship{
From: loc.Coordinates,
To: parentPkg,
Expand All @@ -176,7 +171,7 @@ func populateRelationships(exec file.Executable, parentPkg pkg.Package, resolver
)
}
for _, p := range pkgCollection.Sorted() {
relIndex.Add(
addRelationship(
artifact.Relationship{
From: p,
To: parentPkg,
Expand Down
16 changes: 15 additions & 1 deletion internal/relationship/binary/binary_dependencies_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package binary

import (
"path"
"strings"
"testing"

"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -328,7 +329,20 @@ func relationshipComparer(x, y []artifact.Relationship) string {
artifact.Relationship{},
file.LocationSet{},
pkg.LicenseSet{},
))
), cmpopts.SortSlices(lessRelationships))
}

func lessRelationships(r1, r2 artifact.Relationship) bool {
c := strings.Compare(string(r1.Type), string(r2.Type))
if c != 0 {
return c < 0
}
c = strings.Compare(string(r1.From.ID()), string(r2.From.ID()))
if c != 0 {
return c < 0
}
c = strings.Compare(string(r1.To.ID()), string(r2.To.ID()))
return c < 0
}

func newAccessor(pkgs []pkg.Package, coordinateIndex map[file.Coordinates]file.Executable, preexistingRelationships []artifact.Relationship) sbomsync.Accessor {
Expand Down
193 changes: 143 additions & 50 deletions internal/relationship/index.go
Original file line number Diff line number Diff line change
@@ -1,88 +1,181 @@
package relationship

import (
"github.com/scylladb/go-set/strset"
"slices"
"strings"

"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
)

// Index indexes relationships, preventing duplicates
type Index struct {
typesByFromTo map[artifact.ID]map[artifact.ID]*strset.Set
existing []artifact.Relationship
additional []artifact.Relationship
all []*sortableRelationship
fromID map[artifact.ID]*mappedRelationships
toID map[artifact.ID]*mappedRelationships
}

func NewIndex(existing ...artifact.Relationship) *Index {
r := &Index{
typesByFromTo: make(map[artifact.ID]map[artifact.ID]*strset.Set),
}
r.TrackAll(existing...)
return r
// NewIndex returns a new relationship Index
func NewIndex(relationships ...artifact.Relationship) *Index {
out := Index{}
out.Add(relationships...)
return &out
}

func (i *Index) track(r artifact.Relationship) bool {
fromID := r.From.ID()
if _, ok := i.typesByFromTo[fromID]; !ok {
i.typesByFromTo[fromID] = make(map[artifact.ID]*strset.Set)
// Add adds all the given relationships to the index, without adding duplicates
func (i *Index) Add(relationships ...artifact.Relationship) {
if i.fromID == nil {
i.fromID = map[artifact.ID]*mappedRelationships{}
}

toID := r.To.ID()
if _, ok := i.typesByFromTo[fromID][toID]; !ok {
i.typesByFromTo[fromID][toID] = strset.New()
if i.toID == nil {
i.toID = map[artifact.ID]*mappedRelationships{}
}

var exists bool
if i.typesByFromTo[fromID][toID].Has(string(r.Type)) {
exists = true
// store appropriate indexes for stable ordering to minimize ID() calls
for _, r := range relationships {
// prevent duplicates
if i.Contains(r) {
continue
}

fromID := r.From.ID()
toID := r.To.ID()

relationship := &sortableRelationship{
from: fromID,
to: toID,
relationship: r,
}

// add to all relationships
i.all = append(i.all, relationship)

// add from -> to mapping
mapped := i.fromID[fromID]
if mapped == nil {
mapped = &mappedRelationships{}
i.fromID[fromID] = mapped
}
mapped.add(toID, relationship)

// add to -> from mapping
mapped = i.toID[toID]
if mapped == nil {
mapped = &mappedRelationships{}
i.toID[toID] = mapped
}
mapped.add(fromID, relationship)
}
}

i.typesByFromTo[fromID][toID].Add(string(r.Type))
// From returns all relationships from the given identifiable, with specified types
func (i *Index) From(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship {
return toSortedSlice(fromMapped(i.fromID, identifiable), types)
}

// To returns all relationships to the given identifiable, with specified types
func (i *Index) To(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship {
return toSortedSlice(fromMapped(i.toID, identifiable), types)
}

return !exists
// References returns all relationships that reference to or from the given identifiable
func (i *Index) References(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship {
return toSortedSlice(append(fromMapped(i.fromID, identifiable), fromMapped(i.toID, identifiable)...), types)
}

// Track this relationship as "exists" in the index (this is used to prevent duplicate relationships from being added).
// returns true if the relationship is new to the index, false otherwise.
func (i *Index) Track(r artifact.Relationship) bool {
unique := i.track(r)
if unique {
i.existing = append(i.existing, r)
// Coordinates returns all coordinates for the provided identifiable for provided relationship types
// If no types are provided, all relationship types are considered.
func (i *Index) Coordinates(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []file.Coordinates {
var coordinates []file.Coordinates
for _, relationship := range i.References(identifiable, types...) {
cords := extractCoordinates(relationship)
coordinates = append(coordinates, cords...)
}
return unique
return coordinates
}

// Add a new relationship to the index, returning true if the relationship is new to the index, false otherwise (thus is a duplicate).
func (i *Index) Add(r artifact.Relationship) bool {
if i.track(r) {
i.additional = append(i.additional, r)
return true
// Contains indicates the relationship is present in this index
func (i *Index) Contains(r artifact.Relationship) bool {
if mapped := i.fromID[r.From.ID()]; mapped != nil {
if ids := mapped.typeMap[r.Type]; ids != nil {
return ids[r.To.ID()] != nil
}
}
return false
}

func (i *Index) TrackAll(rs ...artifact.Relationship) {
for _, r := range rs {
i.Track(r)
// All returns a sorted set of relationships matching all types, or all relationships if no types specified
func (i *Index) All(types ...artifact.RelationshipType) []artifact.Relationship {
return toSortedSlice(i.all, types)
}

func fromMapped(idMap map[artifact.ID]*mappedRelationships, identifiable artifact.Identifiable) []*sortableRelationship {
if identifiable == nil || idMap == nil {
return nil
}
mapped := idMap[identifiable.ID()]
if mapped == nil {
return nil
}
return mapped.allRelated
}

func (i *Index) AddAll(rs ...artifact.Relationship) {
for _, r := range rs {
i.Add(r)
func toSortedSlice(relationships []*sortableRelationship, types []artifact.RelationshipType) []artifact.Relationship {
// always return sorted for SBOM stability
slices.SortFunc(relationships, sortFunc)
var out []artifact.Relationship
for _, r := range relationships {
if len(types) == 0 || slices.Contains(types, r.relationship.Type) {
out = append(out, r.relationship)
}
}
return out
}

func (i *Index) NewRelationships() []artifact.Relationship {
return i.additional
func extractCoordinates(relationship artifact.Relationship) (results []file.Coordinates) {
if coordinates, exists := relationship.From.(file.Coordinates); exists {
results = append(results, coordinates)
}

if coordinates, exists := relationship.To.(file.Coordinates); exists {
results = append(results, coordinates)
}

return results
}

type mappedRelationships struct {
typeMap map[artifact.RelationshipType]map[artifact.ID]*sortableRelationship
allRelated []*sortableRelationship
}

func (m *mappedRelationships) add(id artifact.ID, newRelationship *sortableRelationship) {
m.allRelated = append(m.allRelated, newRelationship)
if m.typeMap == nil {
m.typeMap = map[artifact.RelationshipType]map[artifact.ID]*sortableRelationship{}
}
typeMap := m.typeMap[newRelationship.relationship.Type]
if typeMap == nil {
typeMap = map[artifact.ID]*sortableRelationship{}
m.typeMap[newRelationship.relationship.Type] = typeMap
}
typeMap[id] = newRelationship
}

func (i *Index) ExistingRelationships() []artifact.Relationship {
return i.existing
type sortableRelationship struct {
from artifact.ID
to artifact.ID
relationship artifact.Relationship
}

func (i *Index) AllUniqueRelationships() []artifact.Relationship {
var all []artifact.Relationship
all = append(all, i.existing...)
all = append(all, i.additional...)
return all
func sortFunc(a, b *sortableRelationship) int {
cmp := strings.Compare(string(a.relationship.Type), string(b.relationship.Type))
if cmp != 0 {
return cmp
}
cmp = strings.Compare(string(a.from), string(b.from))
if cmp != 0 {
return cmp
}
return strings.Compare(string(a.to), string(b.to))
}
Loading

0 comments on commit 741c8fb

Please sign in to comment.