Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: SPDX output performance with many relationships #3053

Merged
merged 5 commits into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (

"github.com/stretchr/testify/require"

"github.com/anchore/syft/internal/relationship"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/source"
)
Expand Down Expand Up @@ -44,12 +45,13 @@ func TestBinaryElfRelationships(t *testing.T) {
}
}

relationshipIndex := relationship.NewIndex(sbom.Relationships...)
for name, expectedDepNames := range expectedGraph {
pkgId := nameToId[name]
p := sbom.Artifacts.Packages.Package(pkgId)
require.NotNil(t, p, "expected package %q to be present in the SBOM", name)

rels := sbom.RelationshipsForPackage(*p, artifact.DependencyOfRelationship)
rels := relationshipIndex.References(*p, artifact.DependencyOfRelationship)
require.NotEmpty(t, rels, "expected package %q to have relationships", name)

toIds := map[artifact.ID]struct{}{}
Expand Down
35 changes: 15 additions & 20 deletions internal/relationship/binary/binary_dependencies.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,27 +21,22 @@ func NewDependencyRelationships(resolver file.Resolver, accessor sbomsync.Access

// 3. craft package-to-package relationships for each binary that represent shared library dependencies
//note: we only care about package-to-package relationships
var relIndex *relationship.Index
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
relIndex = relationship.NewIndex(s.Relationships...)
})

return generateRelationships(resolver, accessor, index, relIndex)
return generateRelationships(resolver, accessor, index)
}

func generateRelationships(resolver file.Resolver, accessor sbomsync.Accessor, index *sharedLibraryIndex, relIndex *relationship.Index) []artifact.Relationship {
// read all existing dependencyOf relationships
func generateRelationships(resolver file.Resolver, accessor sbomsync.Accessor, index *sharedLibraryIndex) []artifact.Relationship {
newRelationships := relationship.NewIndex()

// find all package-to-package relationships for shared library dependencies
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
for _, r := range s.Relationships {
if r.Type != artifact.DependencyOfRelationship {
continue
relIndex := relationship.NewIndex(s.Relationships...)

addRelationship := func(r artifact.Relationship) {
if !relIndex.Contains(r) {
newRelationships.Add(r)
}
relIndex.Track(r)
}
})

// find all package-to-package relationships for shared library dependencies
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
for _, parentPkg := range s.Artifacts.Packages.Sorted(pkg.BinaryPkg) {
for _, evidentLocation := range parentPkg.Locations.ToSlice() {
if evidentLocation.Annotations[pkg.EvidenceAnnotationKey] != pkg.PrimaryEvidenceAnnotation {
Expand All @@ -54,12 +49,12 @@ func generateRelationships(resolver file.Resolver, accessor sbomsync.Accessor, i
continue
}

populateRelationships(exec, parentPkg, resolver, relIndex, index)
populateRelationships(exec, parentPkg, resolver, addRelationship, index)
}
}
})

return relIndex.NewRelationships()
return newRelationships.All()
}

// PackagesToRemove returns a list of binary packages (resolved by the ELF cataloger) that should be removed from the SBOM
Expand Down Expand Up @@ -147,7 +142,7 @@ func getBinaryPackagesToDelete(resolver file.Resolver, s *sbom.SBOM) []artifact.
return pkgsToDelete
}

func populateRelationships(exec file.Executable, parentPkg pkg.Package, resolver file.Resolver, relIndex *relationship.Index, index *sharedLibraryIndex) {
func populateRelationships(exec file.Executable, parentPkg pkg.Package, resolver file.Resolver, addRelationship func(artifact.Relationship), index *sharedLibraryIndex) {
for _, libReference := range exec.ImportedLibraries {
// for each library reference, check s.Artifacts.Packages.Sorted(pkg.BinaryPkg) for a binary package that represents that library
// if found, create a relationship between the parent package and the library package
Expand All @@ -167,7 +162,7 @@ func populateRelationships(exec file.Executable, parentPkg pkg.Package, resolver
realBaseName := path.Base(loc.RealPath)
pkgCollection := index.owningLibraryPackage(realBaseName)
if pkgCollection.PackageCount() < 1 {
relIndex.Add(
addRelationship(
artifact.Relationship{
From: loc.Coordinates,
To: parentPkg,
Expand All @@ -176,7 +171,7 @@ func populateRelationships(exec file.Executable, parentPkg pkg.Package, resolver
)
}
for _, p := range pkgCollection.Sorted() {
relIndex.Add(
addRelationship(
artifact.Relationship{
From: p,
To: parentPkg,
Expand Down
16 changes: 15 additions & 1 deletion internal/relationship/binary/binary_dependencies_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package binary

import (
"path"
"strings"
"testing"

"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -328,7 +329,20 @@ func relationshipComparer(x, y []artifact.Relationship) string {
artifact.Relationship{},
file.LocationSet{},
pkg.LicenseSet{},
))
), cmpopts.SortSlices(lessRelationships))
}

func lessRelationships(r1, r2 artifact.Relationship) bool {
c := strings.Compare(string(r1.Type), string(r2.Type))
if c != 0 {
return c < 0
}
c = strings.Compare(string(r1.From.ID()), string(r2.From.ID()))
if c != 0 {
return c < 0
}
c = strings.Compare(string(r1.To.ID()), string(r2.To.ID()))
return c < 0
}

func newAccessor(pkgs []pkg.Package, coordinateIndex map[file.Coordinates]file.Executable, preexistingRelationships []artifact.Relationship) sbomsync.Accessor {
Expand Down
193 changes: 143 additions & 50 deletions internal/relationship/index.go
Original file line number Diff line number Diff line change
@@ -1,88 +1,181 @@
package relationship

import (
"github.com/scylladb/go-set/strset"
"slices"
"strings"

"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
)

// Index indexes relationships, preventing duplicates
type Index struct {
typesByFromTo map[artifact.ID]map[artifact.ID]*strset.Set
existing []artifact.Relationship
additional []artifact.Relationship
all []*sortableRelationship
fromID map[artifact.ID]*mappedRelationships
toID map[artifact.ID]*mappedRelationships
}

func NewIndex(existing ...artifact.Relationship) *Index {
r := &Index{
typesByFromTo: make(map[artifact.ID]map[artifact.ID]*strset.Set),
}
r.TrackAll(existing...)
return r
// NewIndex returns a new relationship Index
func NewIndex(relationships ...artifact.Relationship) *Index {
out := Index{}
out.Add(relationships...)
return &out
}

func (i *Index) track(r artifact.Relationship) bool {
fromID := r.From.ID()
if _, ok := i.typesByFromTo[fromID]; !ok {
i.typesByFromTo[fromID] = make(map[artifact.ID]*strset.Set)
// Add adds all the given relationships to the index, without adding duplicates
func (i *Index) Add(relationships ...artifact.Relationship) {
if i.fromID == nil {
i.fromID = map[artifact.ID]*mappedRelationships{}
}

toID := r.To.ID()
if _, ok := i.typesByFromTo[fromID][toID]; !ok {
i.typesByFromTo[fromID][toID] = strset.New()
if i.toID == nil {
i.toID = map[artifact.ID]*mappedRelationships{}
}

var exists bool
if i.typesByFromTo[fromID][toID].Has(string(r.Type)) {
exists = true
// store appropriate indexes for stable ordering to minimize ID() calls
for _, r := range relationships {
// prevent duplicates
if i.Contains(r) {
continue
}

fromID := r.From.ID()
toID := r.To.ID()

relationship := &sortableRelationship{
from: fromID,
to: toID,
relationship: r,
}

// add to all relationships
i.all = append(i.all, relationship)

// add from -> to mapping
mapped := i.fromID[fromID]
if mapped == nil {
mapped = &mappedRelationships{}
i.fromID[fromID] = mapped
}
mapped.add(toID, relationship)

// add to -> from mapping
mapped = i.toID[toID]
if mapped == nil {
mapped = &mappedRelationships{}
i.toID[toID] = mapped
}
mapped.add(fromID, relationship)
}
}

i.typesByFromTo[fromID][toID].Add(string(r.Type))
// From returns all relationships from the given identifiable, with specified types
func (i *Index) From(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship {
return toSortedSlice(fromMapped(i.fromID, identifiable), types)
}

// To returns all relationships to the given identifiable, with specified types
func (i *Index) To(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship {
return toSortedSlice(fromMapped(i.toID, identifiable), types)
}

return !exists
// References returns all relationships that reference to or from the given identifiable
func (i *Index) References(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship {
return toSortedSlice(append(fromMapped(i.fromID, identifiable), fromMapped(i.toID, identifiable)...), types)
}

// Track this relationship as "exists" in the index (this is used to prevent duplicate relationships from being added).
// returns true if the relationship is new to the index, false otherwise.
func (i *Index) Track(r artifact.Relationship) bool {
unique := i.track(r)
if unique {
i.existing = append(i.existing, r)
// Coordinates returns all coordinates for the provided identifiable for provided relationship types
// If no types are provided, all relationship types are considered.
func (i *Index) Coordinates(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []file.Coordinates {
var coordinates []file.Coordinates
for _, relationship := range i.References(identifiable, types...) {
cords := extractCoordinates(relationship)
coordinates = append(coordinates, cords...)
}
return unique
return coordinates
}

// Add a new relationship to the index, returning true if the relationship is new to the index, false otherwise (thus is a duplicate).
func (i *Index) Add(r artifact.Relationship) bool {
if i.track(r) {
i.additional = append(i.additional, r)
return true
// Contains indicates the relationship is present in this index
func (i *Index) Contains(r artifact.Relationship) bool {
if mapped := i.fromID[r.From.ID()]; mapped != nil {
if ids := mapped.typeMap[r.Type]; ids != nil {
return ids[r.To.ID()] != nil
}
}
return false
}

func (i *Index) TrackAll(rs ...artifact.Relationship) {
for _, r := range rs {
i.Track(r)
// All returns a sorted set of relationships matching all types, or all relationships if no types specified
func (i *Index) All(types ...artifact.RelationshipType) []artifact.Relationship {
return toSortedSlice(i.all, types)
}

func fromMapped(idMap map[artifact.ID]*mappedRelationships, identifiable artifact.Identifiable) []*sortableRelationship {
if identifiable == nil || idMap == nil {
return nil
}
mapped := idMap[identifiable.ID()]
if mapped == nil {
return nil
}
return mapped.allRelated
}

func (i *Index) AddAll(rs ...artifact.Relationship) {
for _, r := range rs {
i.Add(r)
func toSortedSlice(relationships []*sortableRelationship, types []artifact.RelationshipType) []artifact.Relationship {
// always return sorted for SBOM stability
slices.SortFunc(relationships, sortFunc)
var out []artifact.Relationship
for _, r := range relationships {
if len(types) == 0 || slices.Contains(types, r.relationship.Type) {
out = append(out, r.relationship)
}
}
return out
}

func (i *Index) NewRelationships() []artifact.Relationship {
return i.additional
func extractCoordinates(relationship artifact.Relationship) (results []file.Coordinates) {
if coordinates, exists := relationship.From.(file.Coordinates); exists {
results = append(results, coordinates)
}

if coordinates, exists := relationship.To.(file.Coordinates); exists {
results = append(results, coordinates)
}

return results
}

type mappedRelationships struct {
typeMap map[artifact.RelationshipType]map[artifact.ID]*sortableRelationship
allRelated []*sortableRelationship
}

func (m *mappedRelationships) add(id artifact.ID, newRelationship *sortableRelationship) {
m.allRelated = append(m.allRelated, newRelationship)
if m.typeMap == nil {
m.typeMap = map[artifact.RelationshipType]map[artifact.ID]*sortableRelationship{}
}
typeMap := m.typeMap[newRelationship.relationship.Type]
if typeMap == nil {
typeMap = map[artifact.ID]*sortableRelationship{}
m.typeMap[newRelationship.relationship.Type] = typeMap
}
typeMap[id] = newRelationship
}

func (i *Index) ExistingRelationships() []artifact.Relationship {
return i.existing
type sortableRelationship struct {
from artifact.ID
to artifact.ID
relationship artifact.Relationship
}

func (i *Index) AllUniqueRelationships() []artifact.Relationship {
var all []artifact.Relationship
all = append(all, i.existing...)
all = append(all, i.additional...)
return all
func sortFunc(a, b *sortableRelationship) int {
cmp := strings.Compare(string(a.relationship.Type), string(b.relationship.Type))
if cmp != 0 {
return cmp
}
cmp = strings.Compare(string(a.from), string(b.from))
if cmp != 0 {
return cmp
}
return strings.Compare(string(a.to), string(b.to))
}
Loading
Loading