Skip to content

Commit

Permalink
[OSV SCALIBR] Performance optimization in the trace package to redu…
Browse files Browse the repository at this point in the history
…ce extractor calls when tracing origin layer. Previously, looping through the inventory would result in an `Extract` call. Now, the inventory results are cached via a local map.

PiperOrigin-RevId: 708194537
  • Loading branch information
Mario Leyva authored and copybara-github committed Dec 20, 2024
1 parent fd6877f commit 1961308
Showing 1 changed file with 33 additions and 9 deletions.
42 changes: 33 additions & 9 deletions artifact/image/layerscanning/trace/trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ import (
scalibrfs "github.com/google/osv-scalibr/fs"
)

// locationAndIndex is a struct to represent a location and the index of the layer it was found in.
type locationAndIndex struct {
location string
index int
}

// PopulateLayerDetails populates the LayerDetails field of the inventory with the origin details
// obtained by tracing the inventory in the image.
//
Expand All @@ -48,6 +54,7 @@ import (
func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory, chainLayers []scalibrImage.ChainLayer, config *filesystem.Config) {
layerDetailsList := []*extractor.LayerDetails{}

// Create list of layer details struct to be referenced by inventory.
for i, chainLayer := range chainLayers {
layerDetailsList = append(layerDetailsList, &extractor.LayerDetails{
Index: i,
Expand All @@ -57,6 +64,7 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
})
}

// Helper function to update the extractor config.
updateExtractorConfig := func(filesToExtract []string, extractor filesystem.Extractor, chainFS scalibrfs.FS) {
config.Extractors = []filesystem.Extractor{extractor}
config.FilesToExtract = filesToExtract
Expand All @@ -67,11 +75,13 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
}
}

for _, inv := range inventory {
lastChainLayer := chainLayers[len(chainLayers)-1]
layerIndex := lastChainLayer.Index()
layerDetails := layerDetailsList[layerIndex]
// locationIndexToInventory is used as an inventory cache to avoid re-extracting the same
// inventory from a file multiple times.
locationIndexToInventory := map[locationAndIndex][]*extractor.Inventory{}
lastLayerIndex := len(chainLayers) - 1

for _, inv := range inventory {
layerDetails := layerDetailsList[lastLayerIndex]
invExtractor, isFilesystemExtractor := inv.Extractor.(filesystem.Extractor)

// Only filesystem extractors are supported for layer scanning. Also, if the inventory has no
Expand All @@ -90,12 +100,26 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
for i := len(chainLayers) - 2; i >= 0; i-- {
oldChainLayer := chainLayers[i]

// Update the extractor config to use the files from the current layer.
updateExtractorConfig(inv.Locations, invExtractor, oldChainLayer.FS())
invLocationAndIndex := locationAndIndex{
location: inv.Locations[0],
index: i,
}

var oldInventory []*extractor.Inventory
if cachedInventory, ok := locationIndexToInventory[invLocationAndIndex]; ok {
oldInventory = cachedInventory
} else {
// Update the extractor config to use the files from the current layer.
updateExtractorConfig(inv.Locations, invExtractor, oldChainLayer.FS())

oldInventory, _, err := filesystem.Run(ctx, config)
if err != nil {
break
var err error
oldInventory, _, err = filesystem.Run(ctx, config)
if err != nil {
break
}

// Cache the inventory for future use.
locationIndexToInventory[invLocationAndIndex] = oldInventory
}

foundPackage := false
Expand Down

0 comments on commit 1961308

Please sign in to comment.