From b9cacf0eabdc71df29f09139b6949dc6de695881 Mon Sep 17 00:00:00 2001 From: Justin Chadwell Date: Fri, 20 Jan 2023 14:32:09 +0000 Subject: [PATCH 1/3] source: avoid second-step of symlink resolution in directory resolver We can use the already existing file tree to peform symlink resolution for FilesByPath, instead of traversing the symlinks again. This moves all of the symlink logic into the indexing code, and then we can rely on syft's resolution algorithm over the index in this part of the codebase. Signed-off-by: Justin Chadwell --- syft/source/directory_resolver.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/syft/source/directory_resolver.go b/syft/source/directory_resolver.go index a35fe6842857..04e0fa73d5de 100644 --- a/syft/source/directory_resolver.go +++ b/syft/source/directory_resolver.go @@ -336,14 +336,17 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error) } // we should be resolving symlinks and preserving this information as a VirtualPath to the real file - evaluatedPath, err := filepath.EvalSymlinks(userStrPath) + exists, ref, err := r.fileTree.File(file.Path(userStrPath), filetree.FollowBasenameLinks) if err != nil { log.Debugf("directory resolver unable to evaluate symlink for path=%q : %+v", userPath, err) continue } + if !exists { + continue + } // TODO: why not use stored metadata? - fileMeta, err := os.Stat(evaluatedPath) + fileMeta, err := os.Stat(string(ref.RealPath)) if errors.Is(err, os.ErrNotExist) { // note: there are other kinds of errors other than os.ErrNotExist that may be given that is platform // specific, but essentially hints at the same overall problem (that the path does not exist). Such an @@ -354,7 +357,7 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error) // invalid paths. This logging statement is meant to raise IO or permissions related problems. var pathErr *os.PathError if !errors.As(err, &pathErr) { - log.Warnf("path is not valid (%s): %+v", evaluatedPath, err) + log.Warnf("path is not valid (%s): %+v", ref.RealPath, err) } continue } @@ -368,15 +371,12 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error) userStrPath = windowsToPosix(userStrPath) } - exists, ref, err := r.fileTree.File(file.Path(userStrPath), filetree.FollowBasenameLinks) - if err == nil && exists { loc := NewVirtualLocationFromDirectory( r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root *ref, ) references = append(references, loc) - } } return references, nil From 533d60b690d051a5b7f7300a4cdaea929f15f6dc Mon Sep 17 00:00:00 2001 From: Justin Chadwell Date: Fri, 20 Jan 2023 14:50:03 +0000 Subject: [PATCH 2/3] source: add base parameter to directory resolver The new base parameter is an optional parameter for the directory resolver that resolves all symlinks relative to this root. There are two intended use cases: - base = "/". The previous behavior, symlinks are resolved relative to the root filesystem. - base = path. Symlinks are resolved relative to the target filesystem, allowing correct behavior when scanning unpacked container filesystems on disk. Signed-off-by: Justin Chadwell --- syft/source/directory_resolver.go | 51 ++++++++++++++++++++------ syft/source/directory_resolver_test.go | 38 +++++++++---------- syft/source/metadata.go | 1 + syft/source/source.go | 25 ++++++++++++- syft/source/source_test.go | 2 +- 5 files changed, 85 insertions(+), 32 deletions(-) diff --git a/syft/source/directory_resolver.go b/syft/source/directory_resolver.go index 04e0fa73d5de..582ba5acf977 100644 --- a/syft/source/directory_resolver.go +++ b/syft/source/directory_resolver.go @@ -37,6 +37,7 @@ type pathFilterFn func(string, os.FileInfo) bool // directoryResolver implements path and content access for the directory data source. type directoryResolver struct { path string + base string currentWdRelativeToRoot string currentWd string fileTree *filetree.FileTree @@ -47,7 +48,7 @@ type directoryResolver struct { errPaths map[string]error } -func newDirectoryResolver(root string, pathFilters ...pathFilterFn) (*directoryResolver, error) { +func newDirectoryResolver(root string, base string, pathFilters ...pathFilterFn) (*directoryResolver, error) { currentWD, err := os.Getwd() if err != nil { return nil, fmt.Errorf("could not get CWD: %w", err) @@ -64,6 +65,18 @@ func newDirectoryResolver(root string, pathFilters ...pathFilterFn) (*directoryR return nil, fmt.Errorf("could not evaluate root=%q symlinks: %w", root, err) } + cleanBase := "" + if base != "" { + cleanBase, err = filepath.EvalSymlinks(base) + if err != nil { + return nil, fmt.Errorf("could not evaluate base=%q symlinks: %w", base, err) + } + cleanBase, err = filepath.Abs(cleanBase) + if err != nil { + return nil, err + } + } + var currentWdRelRoot string if path.IsAbs(cleanRoot) { currentWdRelRoot, err = filepath.Rel(cleanCWD, cleanRoot) @@ -76,6 +89,7 @@ func newDirectoryResolver(root string, pathFilters ...pathFilterFn) (*directoryR resolver := directoryResolver{ path: cleanRoot, + base: cleanBase, currentWd: cleanCWD, currentWdRelativeToRoot: currentWdRelRoot, fileTree: filetree.NewFileTree(), @@ -244,10 +258,25 @@ func (r directoryResolver) addSymlinkToIndex(p string, info os.FileInfo) (string return "", fmt.Errorf("unable to readlink for path=%q: %w", p, err) } - // note: if the link is not absolute (e.g, /dev/stderr -> fd/2 ) we need to resolve it relative to the directory - // in question (e.g. resolve to /dev/fd/2) - if !filepath.IsAbs(linkTarget) { - linkTarget = filepath.Join(filepath.Dir(p), linkTarget) + if filepath.IsAbs(linkTarget) { + // if the link is absolute (e.g, /bin/ls -> /bin/busybox) we need to + // resolve relative to the root of the base directory + linkTarget = filepath.Join(r.base, filepath.Clean(linkTarget)) + } else { + // if the link is not absolute (e.g, /dev/stderr -> fd/2 ) we need to + // resolve it relative to the directory in question (e.g. resolve to + // /dev/fd/2) + if r.base == "" { + linkTarget = filepath.Join(filepath.Dir(p), linkTarget) + } else { + // if the base is set, then we first need to resolve the link, + // before finding it's location in the base + dir, err := filepath.Rel(r.base, filepath.Dir(p)) + if err != nil { + return "", fmt.Errorf("unable to resolve relative path for path=%q: %w", p, err) + } + linkTarget = filepath.Join(r.base, filepath.Clean(filepath.Join("/", dir, linkTarget))) + } } ref, err := r.fileTree.AddSymLink(file.Path(p), file.Path(linkTarget)) @@ -371,12 +400,12 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error) userStrPath = windowsToPosix(userStrPath) } - loc := NewVirtualLocationFromDirectory( - r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root - r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root - *ref, - ) - references = append(references, loc) + loc := NewVirtualLocationFromDirectory( + r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root + r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root + *ref, + ) + references = append(references, loc) } return references, nil diff --git a/syft/source/directory_resolver_test.go b/syft/source/directory_resolver_test.go index bd67ec7443bb..383b1d360d03 100644 --- a/syft/source/directory_resolver_test.go +++ b/syft/source/directory_resolver_test.go @@ -57,7 +57,7 @@ func TestDirectoryResolver_FilesByPath_relativeRoot(t *testing.T) { } for _, c := range cases { t.Run(c.name, func(t *testing.T) { - resolver, err := newDirectoryResolver(c.relativeRoot) + resolver, err := newDirectoryResolver(c.relativeRoot, "") assert.NoError(t, err) refs, err := resolver.FilesByPath(c.input) @@ -111,7 +111,7 @@ func TestDirectoryResolver_FilesByPath_absoluteRoot(t *testing.T) { absRoot, err := filepath.Abs(c.relativeRoot) require.NoError(t, err) - resolver, err := newDirectoryResolver(absRoot) + resolver, err := newDirectoryResolver(absRoot, "") assert.NoError(t, err) refs, err := resolver.FilesByPath(c.input) @@ -172,7 +172,7 @@ func TestDirectoryResolver_FilesByPath(t *testing.T) { } for _, c := range cases { t.Run(c.name, func(t *testing.T) { - resolver, err := newDirectoryResolver(c.root) + resolver, err := newDirectoryResolver(c.root, "") assert.NoError(t, err) hasPath := resolver.HasPath(c.input) @@ -220,7 +220,7 @@ func TestDirectoryResolver_MultipleFilesByPath(t *testing.T) { } for _, c := range cases { t.Run(c.name, func(t *testing.T) { - resolver, err := newDirectoryResolver("./test-fixtures") + resolver, err := newDirectoryResolver("./test-fixtures", "") assert.NoError(t, err) refs, err := resolver.FilesByPath(c.input...) assert.NoError(t, err) @@ -233,7 +233,7 @@ func TestDirectoryResolver_MultipleFilesByPath(t *testing.T) { } func TestDirectoryResolver_FilesByGlobMultiple(t *testing.T) { - resolver, err := newDirectoryResolver("./test-fixtures") + resolver, err := newDirectoryResolver("./test-fixtures", "") assert.NoError(t, err) refs, err := resolver.FilesByGlob("**/image-symlinks/file*") assert.NoError(t, err) @@ -242,7 +242,7 @@ func TestDirectoryResolver_FilesByGlobMultiple(t *testing.T) { } func TestDirectoryResolver_FilesByGlobRecursive(t *testing.T) { - resolver, err := newDirectoryResolver("./test-fixtures/image-symlinks") + resolver, err := newDirectoryResolver("./test-fixtures/image-symlinks", "") assert.NoError(t, err) refs, err := resolver.FilesByGlob("**/*.txt") assert.NoError(t, err) @@ -250,7 +250,7 @@ func TestDirectoryResolver_FilesByGlobRecursive(t *testing.T) { } func TestDirectoryResolver_FilesByGlobSingle(t *testing.T) { - resolver, err := newDirectoryResolver("./test-fixtures") + resolver, err := newDirectoryResolver("./test-fixtures", "") assert.NoError(t, err) refs, err := resolver.FilesByGlob("**/image-symlinks/*1.txt") assert.NoError(t, err) @@ -277,7 +277,7 @@ func TestDirectoryResolver_FilesByPath_ResolvesSymlinks(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple") + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple", "") assert.NoError(t, err) refs, err := resolver.FilesByPath(test.fixture) @@ -300,7 +300,7 @@ func TestDirectoryResolver_FilesByPath_ResolvesSymlinks(t *testing.T) { func TestDirectoryResolverDoesNotIgnoreRelativeSystemPaths(t *testing.T) { // let's make certain that "dev/place" is not ignored, since it is not "/dev/place" - resolver, err := newDirectoryResolver("test-fixtures/system_paths/target") + resolver, err := newDirectoryResolver("test-fixtures/system_paths/target", "") assert.NoError(t, err) // ensure the correct filter function is wired up by default expectedFn := reflect.ValueOf(isUnallowableFileType) @@ -431,7 +431,7 @@ func Test_isUnallowableFileType(t *testing.T) { func Test_directoryResolver_index(t *testing.T) { // note: this test is testing the effects from newDirectoryResolver, indexTree, and addPathToIndex - r, err := newDirectoryResolver("test-fixtures/system_paths/target") + r, err := newDirectoryResolver("test-fixtures/system_paths/target", "") if err != nil { t.Fatalf("unable to get indexed dir resolver: %+v", err) } @@ -608,7 +608,7 @@ func Test_directoryResolver_FilesByMIMEType(t *testing.T) { } for _, test := range tests { t.Run(test.fixturePath, func(t *testing.T) { - resolver, err := newDirectoryResolver(test.fixturePath) + resolver, err := newDirectoryResolver(test.fixturePath, "") assert.NoError(t, err) locations, err := resolver.FilesByMIMEType(test.mimeType) assert.NoError(t, err) @@ -621,7 +621,7 @@ func Test_directoryResolver_FilesByMIMEType(t *testing.T) { } func Test_IndexingNestedSymLinks(t *testing.T) { - resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple") + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple", "") require.NoError(t, err) // check that we can get the real path @@ -674,7 +674,7 @@ func Test_IndexingNestedSymLinks_ignoredIndexes(t *testing.T) { return strings.HasSuffix(path, string(filepath.Separator)+"readme") } - resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple", filterFn) + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple", "", filterFn) require.NoError(t, err) // the path to the real file is PRUNED from the index, so we should NOT expect a location returned @@ -694,7 +694,7 @@ func Test_IndexingNestedSymLinks_ignoredIndexes(t *testing.T) { } func Test_IndexingNestedSymLinksOutsideOfRoot(t *testing.T) { - resolver, err := newDirectoryResolver("./test-fixtures/symlinks-multiple-roots/root") + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-multiple-roots/root", "") require.NoError(t, err) // check that we can get the real path @@ -712,7 +712,7 @@ func Test_IndexingNestedSymLinksOutsideOfRoot(t *testing.T) { } func Test_RootViaSymlink(t *testing.T) { - resolver, err := newDirectoryResolver("./test-fixtures/symlinked-root/nested/link-root") + resolver, err := newDirectoryResolver("./test-fixtures/symlinked-root/nested/link-root", "") require.NoError(t, err) locations, err := resolver.FilesByPath("./file1.txt") @@ -753,7 +753,7 @@ func Test_directoryResolver_FileContentsByLocation(t *testing.T) { } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - r, err := newDirectoryResolver(".") + r, err := newDirectoryResolver(".", "") require.NoError(t, err) actual, err := r.FileContentsByLocation(test.location) @@ -819,7 +819,7 @@ func Test_isUnixSystemRuntimePath(t *testing.T) { func Test_SymlinkLoopWithGlobsShouldResolve(t *testing.T) { test := func(t *testing.T) { - resolver, err := newDirectoryResolver("./test-fixtures/symlinks-loop") + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-loop", "") require.NoError(t, err) locations, err := resolver.FilesByGlob("**/file.target") @@ -853,7 +853,7 @@ func Test_IncludeRootPathInIndex(t *testing.T) { return path != "/" } - resolver, err := newDirectoryResolver("/", filterFn) + resolver, err := newDirectoryResolver("/", "", filterFn) require.NoError(t, err) exists, ref, err := resolver.fileTree.File(file.Path("/")) @@ -870,7 +870,7 @@ func TestDirectoryResolver_indexPath(t *testing.T) { tempFile, err := os.CreateTemp("", "") require.NoError(t, err) - resolver, err := newDirectoryResolver(tempFile.Name()) + resolver, err := newDirectoryResolver(tempFile.Name(), "") require.NoError(t, err) t.Run("filtering path with nil os.FileInfo", func(t *testing.T) { diff --git a/syft/source/metadata.go b/syft/source/metadata.go index feafba7bc3b2..1d29973b4ec5 100644 --- a/syft/source/metadata.go +++ b/syft/source/metadata.go @@ -6,5 +6,6 @@ type Metadata struct { Scheme Scheme // the source data scheme type (directory or image) ImageMetadata ImageMetadata // all image info (image only) Path string // the root path to be cataloged (directory only) + Base string // the base path to be cataloged (directory only) Name string } diff --git a/syft/source/source.go b/syft/source/source.go index d6060a46bdcb..ed96dd110614 100644 --- a/syft/source/source.go +++ b/syft/source/source.go @@ -32,6 +32,7 @@ type Source struct { Metadata Metadata directoryResolver *directoryResolver `hash:"ignore"` path string + base string mutex *sync.Mutex Exclusions []string `hash:"ignore"` } @@ -252,6 +253,11 @@ func NewFromDirectory(path string) (Source, error) { return NewFromDirectoryWithName(path, "") } +// NewFromDirectory creates a new source object tailored to catalog a given filesystem directory recursively. +func NewFromDirectoryRoot(path string) (Source, error) { + return NewFromDirectoryRootWithName(path, "") +} + // NewFromDirectoryWithName creates a new source object tailored to catalog a given filesystem directory recursively, with an explicitly provided name. func NewFromDirectoryWithName(path string, name string) (Source, error) { s := Source{ @@ -267,6 +273,23 @@ func NewFromDirectoryWithName(path string, name string) (Source, error) { return s, nil } +// NewFromDirectoryRootWithName creates a new source object tailored to catalog a given filesystem directory recursively, with an explicitly provided name. +func NewFromDirectoryRootWithName(path string, name string) (Source, error) { + s := Source{ + mutex: &sync.Mutex{}, + Metadata: Metadata{ + Name: name, + Scheme: DirectoryScheme, + Path: path, + Base: path, + }, + path: path, + base: path, + } + s.SetID() + return s, nil +} + // NewFromFile creates a new source object tailored to catalog a file. func NewFromFile(path string) (Source, func()) { return NewFromFileWithName(path, "") @@ -428,7 +451,7 @@ func (s *Source) FileResolver(scope Scope) (FileResolver, error) { if err != nil { return nil, err } - resolver, err := newDirectoryResolver(s.path, exclusionFunctions...) + resolver, err := newDirectoryResolver(s.path, s.base, exclusionFunctions...) if err != nil { return nil, fmt.Errorf("unable to create directory resolver: %w", err) } diff --git a/syft/source/source_test.go b/syft/source/source_test.go index 1cd8a3b40dca..c172a3572c57 100644 --- a/syft/source/source_test.go +++ b/syft/source/source_test.go @@ -121,7 +121,7 @@ func TestSetID(t *testing.T) { Path: "test-fixtures/image-simple", }, }, - expected: artifact.ID("14b60020c4f9955"), + expected: artifact.ID("1b0dc351e6577b01"), }, } From d1d5fa1cb6fe8380397075834d81acbdfb26567a Mon Sep 17 00:00:00 2001 From: Justin Chadwell Date: Fri, 20 Jan 2023 14:51:43 +0000 Subject: [PATCH 3/3] source: add tests for new base parameter Signed-off-by: Justin Chadwell --- syft/source/directory_resolver_test.go | 73 +++++++++++++++++++ syft/source/test-fixtures/symlinks-base/bar | 1 + syft/source/test-fixtures/symlinks-base/base | 0 syft/source/test-fixtures/symlinks-base/baz | 1 + syft/source/test-fixtures/symlinks-base/chain | 1 + syft/source/test-fixtures/symlinks-base/foo | 1 + .../test-fixtures/symlinks-base/sub/item | 0 .../test-fixtures/symlinks-base/sub/link | 1 + 8 files changed, 78 insertions(+) create mode 120000 syft/source/test-fixtures/symlinks-base/bar create mode 100644 syft/source/test-fixtures/symlinks-base/base create mode 120000 syft/source/test-fixtures/symlinks-base/baz create mode 120000 syft/source/test-fixtures/symlinks-base/chain create mode 120000 syft/source/test-fixtures/symlinks-base/foo create mode 100644 syft/source/test-fixtures/symlinks-base/sub/item create mode 120000 syft/source/test-fixtures/symlinks-base/sub/link diff --git a/syft/source/directory_resolver_test.go b/syft/source/directory_resolver_test.go index 383b1d360d03..7305c225b32d 100644 --- a/syft/source/directory_resolver_test.go +++ b/syft/source/directory_resolver_test.go @@ -885,3 +885,76 @@ func TestDirectoryResolver_indexPath(t *testing.T) { }) }) } + +func TestDirectoryResolver_FilesByPath_baseRoot(t *testing.T) { + cases := []struct { + name string + root string + input string + expected []string + }{ + { + name: "should find the base file", + root: "./test-fixtures/symlinks-base/", + input: "./base", + expected: []string{ + "base", + }, + }, + { + name: "should follow a link with a pivoted root", + root: "./test-fixtures/symlinks-base/", + input: "./foo", + expected: []string{ + "base", + }, + }, + { + name: "should follow a relative link with extra parents", + root: "./test-fixtures/symlinks-base/", + input: "./bar", + expected: []string{ + "base", + }, + }, + { + name: "should follow an absolute link with extra parents", + root: "./test-fixtures/symlinks-base/", + input: "./baz", + expected: []string{ + "base", + }, + }, + { + name: "should follow an absolute link with extra parents", + root: "./test-fixtures/symlinks-base/", + input: "./sub/link", + expected: []string{ + "sub/item", + }, + }, + { + name: "should follow chained pivoted link", + root: "./test-fixtures/symlinks-base/", + input: "./chain", + expected: []string{ + "base", + }, + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + resolver, err := newDirectoryResolver(c.root, c.root) + assert.NoError(t, err) + + refs, err := resolver.FilesByPath(c.input) + require.NoError(t, err) + assert.Len(t, refs, len(c.expected)) + s := strset.New() + for _, actual := range refs { + s.Add(actual.RealPath) + } + assert.ElementsMatch(t, c.expected, s.List()) + }) + } +} diff --git a/syft/source/test-fixtures/symlinks-base/bar b/syft/source/test-fixtures/symlinks-base/bar new file mode 120000 index 000000000000..ec3f86cdc10b --- /dev/null +++ b/syft/source/test-fixtures/symlinks-base/bar @@ -0,0 +1 @@ +../../base \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-base/base b/syft/source/test-fixtures/symlinks-base/base new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/syft/source/test-fixtures/symlinks-base/baz b/syft/source/test-fixtures/symlinks-base/baz new file mode 120000 index 000000000000..e323b7944916 --- /dev/null +++ b/syft/source/test-fixtures/symlinks-base/baz @@ -0,0 +1 @@ +/../../base \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-base/chain b/syft/source/test-fixtures/symlinks-base/chain new file mode 120000 index 000000000000..7370610cce70 --- /dev/null +++ b/syft/source/test-fixtures/symlinks-base/chain @@ -0,0 +1 @@ +/foo \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-base/foo b/syft/source/test-fixtures/symlinks-base/foo new file mode 120000 index 000000000000..d947464b4269 --- /dev/null +++ b/syft/source/test-fixtures/symlinks-base/foo @@ -0,0 +1 @@ +/base \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-base/sub/item b/syft/source/test-fixtures/symlinks-base/sub/item new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/syft/source/test-fixtures/symlinks-base/sub/link b/syft/source/test-fixtures/symlinks-base/sub/link new file mode 120000 index 000000000000..ae0223a40aec --- /dev/null +++ b/syft/source/test-fixtures/symlinks-base/sub/link @@ -0,0 +1 @@ +../sub/item \ No newline at end of file