Skip to content

Commit

Permalink
[OSV SCALIBR] Performance optimization in the trace package to redu…
Browse files Browse the repository at this point in the history
…ce extractor calls when tracing origin layer. Previously, looping through the inventory would result in an `Extract` call. Now, the inventory results are cached via a local map.

PiperOrigin-RevId: 710624564
  • Loading branch information
Mario Leyva authored and copybara-github committed Dec 30, 2024
1 parent fd6877f commit 5f1a98f
Showing 1 changed file with 37 additions and 13 deletions.
50 changes: 37 additions & 13 deletions artifact/image/layerscanning/trace/trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ import (
scalibrfs "github.com/google/osv-scalibr/fs"
)

// locationAndIndex is a struct to represent a location and the index of the layer it was found in.
type locationAndIndex struct {
location string
index int
}

// PopulateLayerDetails populates the LayerDetails field of the inventory with the origin details
// obtained by tracing the inventory in the image.
//
Expand All @@ -46,17 +52,19 @@ import (
// Note that a precondition of this algorithm is that the chain layers are ordered by order of
// creation.
func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory, chainLayers []scalibrImage.ChainLayer, config *filesystem.Config) {
layerDetailsList := []*extractor.LayerDetails{}
chainLayerDetailsList := []*extractor.LayerDetails{}

// Create list of layer details struct to be referenced by inventory.
for i, chainLayer := range chainLayers {
layerDetailsList = append(layerDetailsList, &extractor.LayerDetails{
chainLayerDetailsList = append(chainLayerDetailsList, &extractor.LayerDetails{
Index: i,
DiffID: chainLayer.Layer().DiffID(),
Command: chainLayer.Layer().Command(),
InBaseImage: false,
})
}

// Helper function to update the extractor config.
updateExtractorConfig := func(filesToExtract []string, extractor filesystem.Extractor, chainFS scalibrfs.FS) {
config.Extractors = []filesystem.Extractor{extractor}
config.FilesToExtract = filesToExtract
Expand All @@ -67,11 +75,13 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
}
}

for _, inv := range inventory {
lastChainLayer := chainLayers[len(chainLayers)-1]
layerIndex := lastChainLayer.Index()
layerDetails := layerDetailsList[layerIndex]
// locationIndexToInventory is used as an inventory cache to avoid re-extracting the same
// inventory from a file multiple times.
locationIndexToInventory := map[locationAndIndex][]*extractor.Inventory{}
lastLayerIndex := len(chainLayers) - 1

for _, inv := range inventory {
layerDetails := chainLayerDetailsList[lastLayerIndex]
invExtractor, isFilesystemExtractor := inv.Extractor.(filesystem.Extractor)

// Only filesystem extractors are supported for layer scanning. Also, if the inventory has no
Expand All @@ -90,12 +100,26 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
for i := len(chainLayers) - 2; i >= 0; i-- {
oldChainLayer := chainLayers[i]

// Update the extractor config to use the files from the current layer.
updateExtractorConfig(inv.Locations, invExtractor, oldChainLayer.FS())
invLocationAndIndex := locationAndIndex{
location: inv.Locations[0],
index: i,
}

var oldInventory []*extractor.Inventory
if cachedInventory, ok := locationIndexToInventory[invLocationAndIndex]; ok {
oldInventory = cachedInventory
} else {
// Update the extractor config to use the files from the current layer.
updateExtractorConfig(inv.Locations, invExtractor, oldChainLayer.FS())

oldInventory, _, err := filesystem.Run(ctx, config)
if err != nil {
break
var err error
oldInventory, _, err = filesystem.Run(ctx, config)
if err != nil {
break
}

// Cache the inventory for future use.
locationIndexToInventory[invLocationAndIndex] = oldInventory
}

foundPackage := false
Expand All @@ -108,7 +132,7 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,

// If the inventory is not present in the old layer, then it was introduced in layer i+1.
if !foundPackage {
layerDetails = layerDetailsList[i+1]
layerDetails = chainLayerDetailsList[i+1]
foundOrigin = true
break
}
Expand All @@ -117,7 +141,7 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
// If the inventory is present in every layer, then it means it was introduced in the first
// layer.
if !foundOrigin {
layerDetails = layerDetailsList[0]
layerDetails = chainLayerDetailsList[0]
}
inv.LayerDetails = layerDetails
}
Expand Down

0 comments on commit 5f1a98f

Please sign in to comment.