Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new Path filter function to index paths based on the filter #151

Open
wants to merge 25 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
ca742f2
skip paths not in catalogers glob patterns
gnmahanth Jan 6, 2023
30a1c6d
fix missing java and binary files from sbom
gnmahanth Jan 9, 2023
ee0c971
modify code to take path filter func as parmater instead of all paths
gnmahanth Jan 10, 2023
08e5b9c
fix lint issue
gnmahanth Jan 11, 2023
178bc5c
update go to 1.19 (#155)
bradleyjones Feb 3, 2023
61a4559
Add additional catalog indexes for performance (#154)
wagoodman Feb 8, 2023
38cc5e5
Bump github.com/containerd/containerd from 1.6.12 to 1.6.18 (#156)
dependabot[bot] Feb 16, 2023
bd07bb5
fix: bump golang.org/x/net to v0.7.0 (#157)
westonsteimel Feb 21, 2023
66b650a
Fix link cycle detection (#158)
wagoodman Feb 21, 2023
c032eae
Link Detection Stack Depth FailSafe (#159)
spiffcs Feb 22, 2023
dbb5dc0
fix: thread safety for progress (#162)
kzantow Mar 1, 2023
9425f2a
chore: fix typo (#163)
KushalBeniwal Mar 17, 2023
7aad86f
Add ability to explicitly specify an Authenticator as well as Keychai…
vaikas Mar 23, 2023
19345b8
fix directory leak leading to out of disk (#161)
Mar 23, 2023
b7456e6
Set the default platform for select sources based on host arch (#152)
wagoodman Mar 23, 2023
d9a9a51
Bump github.com/docker/docker (#167)
dependabot[bot] Apr 6, 2023
6c1941d
Preserve time and expose link strategy (#166)
wobito Apr 11, 2023
162a14e
Bump github.com/containerd/containerd from 1.6.18 to 1.7.0 (#168)
Amar-Babu Apr 12, 2023
abe6881
Add format make target (#170)
wagoodman Apr 12, 2023
3ab9510
refactor: embed fs.FileInfo within file.Metadata (#172)
tri-adam May 8, 2023
73cb649
Specify platform in integ test images (#181)
willmurphyscode May 22, 2023
ad7eb21
Bump github.com/docker/distribution (#178)
dependabot[bot] May 22, 2023
7f87e8b
Change platform selection logic (#189)
willmurphyscode Jun 9, 2023
dde2147
Merge branch 'anchore:main' into main
gnmahanth Jun 15, 2023
b9ab08f
Merge branch 'main' into path-filter
gnmahanth Jun 15, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions client.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func WithPlatform(platform string) Option {
}

// GetImageFromSource returns an image from the explicitly provided source.
func GetImageFromSource(ctx context.Context, imgStr string, source image.Source, options ...Option) (*image.Image, error) {
func GetImageFromSource(ctx context.Context, imgStr string, source image.Source, filter image.PathFilter, options ...Option) (*image.Image, error) {
log.Debugf("image: source=%+v location=%+v", source, imgStr)

var cfg config
Expand All @@ -91,7 +91,7 @@ func GetImageFromSource(ctx context.Context, imgStr string, source image.Source,
return nil, fmt.Errorf("unable to use %s source: %w", source, err)
}

err = img.Read()
err = img.Read(filter)
if err != nil {
return nil, fmt.Errorf("could not read image: %+v", err)
}
Expand Down Expand Up @@ -170,12 +170,12 @@ func defaultPlatformIfNil(cfg *config) {

// GetImage parses the user provided image string and provides an image object;
// note: the source where the image should be referenced from is automatically inferred.
func GetImage(ctx context.Context, userStr string, options ...Option) (*image.Image, error) {
func GetImage(ctx context.Context, userStr string, filter image.PathFilter, options ...Option) (*image.Image, error) {
source, imgStr, err := image.DetectSource(userStr)
if err != nil {
return nil, err
}
return GetImageFromSource(ctx, imgStr, source, options...)
return GetImageFromSource(ctx, imgStr, source, filter, options...)
}

func SetLogger(logger logger.Logger) {
Expand Down
3 changes: 2 additions & 1 deletion examples/basic.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ func main() {
// ./path/to.tar
//
// This will catalog the file metadata and resolve all squash trees
image, err := stereoscope.GetImage(ctx, os.Args[1])
filter := func(path string) bool { return true }
image, err := stereoscope.GetImage(ctx, os.Args[1], filter)
if err != nil {
panic(err)
}
Expand Down
10 changes: 5 additions & 5 deletions pkg/image/file_catalog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ func TestFileCatalog_GetByExtension(t *testing.T) {
// we don't need the index itself, just the side effect on the file catalog after indexing
_, err := file.NewTarIndex(
fixtureTarFile.Name(),
layerTarIndexer(ft, fileCatalog, &size, nil, nil),
layerTarIndexer(ft, fileCatalog, &size, nil, nil, nil),
)
require.NoError(t, err)

Expand Down Expand Up @@ -389,7 +389,7 @@ func TestFileCatalog_GetByBasename(t *testing.T) {
// we don't need the index itself, just the side effect on the file catalog after indexing
_, err := file.NewTarIndex(
fixtureTarFile.Name(),
layerTarIndexer(ft, fileCatalog, &size, nil, nil),
layerTarIndexer(ft, fileCatalog, &size, nil, nil, nil),
)
require.NoError(t, err)

Expand Down Expand Up @@ -493,7 +493,7 @@ func TestFileCatalog_GetByBasenameGlob(t *testing.T) {
// we don't need the index itself, just the side effect on the file catalog after indexing
_, err := file.NewTarIndex(
fixtureTarFile.Name(),
layerTarIndexer(ft, fileCatalog, &size, nil, nil),
layerTarIndexer(ft, fileCatalog, &size, nil, nil, nil),
)
require.NoError(t, err)

Expand Down Expand Up @@ -605,7 +605,7 @@ func TestFileCatalog_GetByMimeType(t *testing.T) {
// we don't need the index itself, just the side effect on the file catalog after indexing
_, err := file.NewTarIndex(
fixtureTarFile.Name(),
layerTarIndexer(ft, fileCatalog, &size, nil, nil),
layerTarIndexer(ft, fileCatalog, &size, nil, nil, nil),
)
require.NoError(t, err)

Expand Down Expand Up @@ -706,7 +706,7 @@ func TestFileCatalog_GetBasenames(t *testing.T) {
// we don't need the index itself, just the side effect on the file catalog after indexing
_, err := file.NewTarIndex(
fixtureTarFile.Name(),
layerTarIndexer(ft, fileCatalog, &size, nil, nil),
layerTarIndexer(ft, fileCatalog, &size, nil, nil, nil),
)
require.NoError(t, err)

Expand Down
4 changes: 2 additions & 2 deletions pkg/image/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ func (i *Image) applyOverrideMetadata() error {

// Read parses information from the underlying image tar into this struct. This includes image metadata, layer
// metadata, layer file trees, and layer squash trees (which implies the image squash tree).
func (i *Image) Read() error {
func (i *Image) Read(filter PathFilter) error {
var layers = make([]*Layer, 0)
var err error
i.Metadata, err = readImageMetadata(i.image)
Expand Down Expand Up @@ -215,7 +215,7 @@ func (i *Image) Read() error {

for idx, v1Layer := range v1Layers {
layer := NewLayer(v1Layer)
err := layer.Read(fileCatalog, i.Metadata, idx, i.contentCacheDir)
err := layer.Read(fileCatalog, i.Metadata, idx, i.contentCacheDir, filter)
if err != nil {
return err
}
Expand Down
23 changes: 17 additions & 6 deletions pkg/image/layer.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ import (

const SingularitySquashFSLayer = "application/vnd.sylabs.sif.layer.v1.squashfs"

// PathFilter decides if a path has to be included in the index
type PathFilter = func(path string) bool

// Layer represents a single layer within a container image.
type Layer struct {
// layer is the raw layer metadata and content provider from the GCR lib
Expand Down Expand Up @@ -80,7 +83,7 @@ func (l *Layer) uncompressedTarCache(uncompressedLayersCacheDir string) (string,

// Read parses information from the underlying layer tar into this struct. This includes layer metadata, the layer
// file tree, and the layer squash tree.
func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncompressedLayersCacheDir string) error {
func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncompressedLayersCacheDir string, filter PathFilter) error {
var err error
tree := filetree.New()
l.Tree = tree
Expand Down Expand Up @@ -113,7 +116,7 @@ func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncomp

l.indexedContent, err = file.NewTarIndex(
tarFilePath,
layerTarIndexer(tree, l.fileCatalog, &l.Metadata.Size, l, monitor),
layerTarIndexer(tree, l.fileCatalog, &l.Metadata.Size, l, monitor, filter),
)
if err != nil {
return fmt.Errorf("failed to read layer=%q tar : %w", l.Metadata.Digest, err)
Expand All @@ -128,9 +131,9 @@ func (l *Layer) Read(catalog *FileCatalog, imgMetadata Metadata, idx int, uncomp

// Walk the more efficient walk if we're blessed with an io.ReaderAt.
if ra, ok := r.(io.ReaderAt); ok {
err = file.WalkSquashFS(ra, squashfsVisitor(tree, l.fileCatalog, &l.Metadata.Size, l, monitor))
err = file.WalkSquashFS(ra, squashfsVisitor(tree, l.fileCatalog, &l.Metadata.Size, l, monitor, filter))
} else {
err = file.WalkSquashFSFromReader(r, squashfsVisitor(tree, l.fileCatalog, &l.Metadata.Size, l, monitor))
err = file.WalkSquashFSFromReader(r, squashfsVisitor(tree, l.fileCatalog, &l.Metadata.Size, l, monitor, filter))
}
if err != nil {
return fmt.Errorf("failed to walk layer=%q: %w", l.Metadata.Digest, err)
Expand Down Expand Up @@ -205,7 +208,7 @@ func (l *Layer) FilesByMIMETypeFromSquash(mimeTypes ...string) ([]file.Reference
return refs, nil
}

func layerTarIndexer(ft filetree.Writer, fileCatalog *FileCatalog, size *int64, layerRef *Layer, monitor *progress.Manual) file.TarIndexVisitor {
func layerTarIndexer(ft filetree.Writer, fileCatalog *FileCatalog, size *int64, layerRef *Layer, monitor *progress.Manual, filter PathFilter) file.TarIndexVisitor {
builder := filetree.NewBuilder(ft, fileCatalog.Index)

return func(index file.TarIndexEntry) error {
Expand All @@ -220,6 +223,10 @@ func layerTarIndexer(ft filetree.Writer, fileCatalog *FileCatalog, size *int64,
}()
metadata := file.NewMetadata(entry.Header, contents)

if !filter(metadata.Path) {
return nil
}

// note: the tar header name is independent of surrounding structure, for example, there may be a tar header entry
// for /some/path/to/file.txt without any entries to constituent paths (/some, /some/path, /some/path/to ).
// This is ok, and the FileTree will account for this by automatically adding directories for non-existing
Expand Down Expand Up @@ -247,10 +254,14 @@ func layerTarIndexer(ft filetree.Writer, fileCatalog *FileCatalog, size *int64,
}
}

func squashfsVisitor(ft filetree.Writer, fileCatalog *FileCatalog, size *int64, layerRef *Layer, monitor *progress.Manual) file.SquashFSVisitor {
func squashfsVisitor(ft filetree.Writer, fileCatalog *FileCatalog, size *int64, layerRef *Layer, monitor *progress.Manual, filter PathFilter) file.SquashFSVisitor {
builder := filetree.NewBuilder(ft, fileCatalog.Index)

return func(fsys fs.FS, path string, d fs.DirEntry) error {
if !filter(path) {
return nil
}

ff, err := fsys.Open(path)
if err != nil {
return err
Expand Down
4 changes: 3 additions & 1 deletion pkg/image/sif/provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ func TestSingularityImageProvider_Provide(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
p := NewProviderFromPath(tt.path, file.NewTempDirGenerator(""))

filter := func(path string) bool { return true }

i, err := p.Provide(context.Background(), tt.userMetadata...)
t.Cleanup(func() { _ = i.Cleanup() })

Expand All @@ -45,7 +47,7 @@ func TestSingularityImageProvider_Provide(t *testing.T) {
}

if err == nil {
if err := i.Read(); err != nil {
if err := i.Read(filter); err != nil {
t.Fatal(err)
}
}
Expand Down
6 changes: 4 additions & 2 deletions pkg/imagetest/image_fixtures.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,9 @@ func PrepareFixtureImage(t testing.TB, source, name string) string {

func GetFixtureImage(t testing.TB, source, name string) *image.Image {
request := PrepareFixtureImage(t, source, name)
filter := func(path string) bool { return true }

i, err := stereoscope.GetImage(context.TODO(), request)
i, err := stereoscope.GetImage(context.TODO(), request, filter)
require.NoError(t, err)
t.Cleanup(func() {
require.NoError(t, i.Cleanup())
Expand Down Expand Up @@ -110,8 +111,9 @@ func skopeoCopyDockerArchiveToPath(t testing.TB, dockerArchivePath, destination

func getFixtureImageFromTar(t testing.TB, tarPath string) *image.Image {
request := fmt.Sprintf("docker-archive:%s", tarPath)
filter := func(path string) bool { return true }

i, err := stereoscope.GetImage(context.TODO(), request)
i, err := stereoscope.GetImage(context.TODO(), request, filter)
require.NoError(t, err)

t.Cleanup(func() {
Expand Down
3 changes: 2 additions & 1 deletion test/integration/fixture_image_simple_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,12 +145,13 @@ func BenchmarkSimpleImage_GetImage(b *testing.B) {
continue
}
request := imagetest.PrepareFixtureImage(b, c.source, "image-simple")
filter := func(path string) bool { return true }

b.Run(c.source, func(b *testing.B) {
var bi *image.Image
for i := 0; i < b.N; i++ {

bi, err = stereoscope.GetImage(context.TODO(), request)
bi, err = stereoscope.GetImage(context.TODO(), request, filter)
b.Cleanup(func() {
require.NoError(b, bi.Cleanup())
})
Expand Down
3 changes: 2 additions & 1 deletion test/integration/mime_type_detection_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ import (

func TestContentMIMETypeDetection(t *testing.T) {
request := imagetest.PrepareFixtureImage(t, "docker-archive", "image-simple")
filter := func(path string) bool { return true }

img, err := stereoscope.GetImage(context.TODO(), request)
img, err := stereoscope.GetImage(context.TODO(), request, filter)

assert.NoError(t, err)
t.Cleanup(stereoscope.Cleanup)
Expand Down
6 changes: 4 additions & 2 deletions test/integration/oci_registry_source_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,15 @@ func TestOciRegistrySourceMetadata(t *testing.T) {
imgStr := "anchore/test_images"
ref := fmt.Sprintf("%s@%s", imgStr, digest)

img, err := stereoscope.GetImage(context.TODO(), "registry:"+ref)
filter := func(path string) bool { return true }

img, err := stereoscope.GetImage(context.TODO(), "registry:"+ref, filter)
require.NoError(t, err)
t.Cleanup(func() {
require.NoError(t, img.Cleanup())
})

require.NoError(t, img.Read())
require.NoError(t, img.Read(filter))

assert.Len(t, img.Metadata.RepoDigests, 1)
assert.Equal(t, "index.docker.io/"+ref, img.Metadata.RepoDigests[0])
Expand Down
9 changes: 6 additions & 3 deletions test/integration/platform_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ func TestPlatformSelection(t *testing.T) {
tt.expectedErr = require.NoError
}
platformOpt := stereoscope.WithPlatform(platform)
img, err := stereoscope.GetImageFromSource(context.TODO(), imageName, tt.source, platformOpt)
filter := func(path string) bool { return true }
img, err := stereoscope.GetImageFromSource(context.TODO(), imageName, tt.source, filter, platformOpt)
tt.expectedErr(t, err)
require.NotNil(t, img)

Expand Down Expand Up @@ -113,17 +114,19 @@ func TestDigestThatNarrowsToOnePlatform(t *testing.T) {
source: image.OciRegistrySource,
},
}
filter := func(path string) bool { return true }
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
img, err := stereoscope.GetImageFromSource(context.TODO(), imageStrWithDigest, tt.source)
img, err := stereoscope.GetImageFromSource(context.TODO(), imageStrWithDigest, tt.source, filter)
assert.NoError(t, err)
assertArchAndOs(t, img, "linux", "s390x")
})
}
}

func TestDefaultPlatformWithOciRegistry(t *testing.T) {
img, err := stereoscope.GetImageFromSource(context.TODO(), "busybox:1.31", image.OciRegistrySource)
filter := func(path string) bool { return true }
img, err := stereoscope.GetImageFromSource(context.TODO(), "busybox:1.31", image.OciRegistrySource, filter)
require.NoError(t, err)
assertArchAndOs(t, img, "linux", runtime.GOARCH)
}
Expand Down