From 9d0ab16e32163800c6b463804cc217946e78a817 Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Tue, 17 Aug 2021 12:41:22 -0700 Subject: [PATCH] Simplify walker to use PatternMatcher instead of custom matching This changes the pattern matching implementation in walker to use PatternMatcher, making it consistent with the "copy" code. This makes it support ** patterns as well, which formerly didn't work. The added benchmark shows the new implementation is slightly faster than the old one. --- prefix/match.go | 45 ------ prefix/match_test.go | 57 ------- walker.go | 224 ++++++++++++++++----------- walker_test.go | 357 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 493 insertions(+), 190 deletions(-) delete mode 100644 prefix/match.go delete mode 100644 prefix/match_test.go diff --git a/prefix/match.go b/prefix/match.go deleted file mode 100644 index 57e745c6..00000000 --- a/prefix/match.go +++ /dev/null @@ -1,45 +0,0 @@ -package prefix - -import ( - "path" - "path/filepath" - "strings" -) - -// Match matches a path against a pattern. It returns m = true if the path -// matches the pattern, and partial = true if the pattern has more separators -// than the path and the common components match (for example, name = foo and -// pattern = foo/bar/*). slashSeparator determines whether the path and pattern -// are '/' delimited (true) or use the native path separator (false). -func Match(pattern, name string, slashSeparator bool) (m bool, partial bool) { - separator := filepath.Separator - if slashSeparator { - separator = '/' - } - count := strings.Count(name, string(separator)) - if strings.Count(pattern, string(separator)) > count { - pattern = trimUntilIndex(pattern, string(separator), count) - partial = true - } - if slashSeparator { - m, _ = path.Match(pattern, name) - } else { - m, _ = filepath.Match(pattern, name) - } - return m, partial -} - -func trimUntilIndex(str, sep string, count int) string { - s := str - i := 0 - c := 0 - for { - idx := strings.Index(s, sep) - s = s[idx+len(sep):] - i += idx + len(sep) - c++ - if c > count { - return str[:i-len(sep)] - } - } -} diff --git a/prefix/match_test.go b/prefix/match_test.go deleted file mode 100644 index bbb253ba..00000000 --- a/prefix/match_test.go +++ /dev/null @@ -1,57 +0,0 @@ -package prefix - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestMatch(t *testing.T) { - ok, partial := Match("foo", "foo", false) - assert.Equal(t, true, ok) - assert.Equal(t, false, partial) - - ok, partial = Match("foo/bar/baz", "foo", false) - assert.Equal(t, true, ok) - assert.Equal(t, true, partial) - - ok, partial = Match("foo/bar/baz", "foo/bar", false) - assert.Equal(t, true, ok) - assert.Equal(t, true, partial) - - ok, partial = Match("foo/bar/baz", "foo/bax", false) - assert.Equal(t, false, ok) - assert.Equal(t, true, partial) - - ok, partial = Match("foo/bar/baz", "foo/bar/baz", false) - assert.Equal(t, true, ok) - assert.Equal(t, false, partial) - - ok, partial = Match("f*", "foo", false) - assert.Equal(t, true, ok) - assert.Equal(t, false, partial) - - ok, partial = Match("foo/bar/*", "foo", false) - assert.Equal(t, true, ok) - assert.Equal(t, true, partial) - - ok, partial = Match("foo/*/baz", "foo", false) - assert.Equal(t, true, ok) - assert.Equal(t, true, partial) - - ok, partial = Match("*/*/baz", "foo", false) - assert.Equal(t, true, ok) - assert.Equal(t, true, partial) - - ok, partial = Match("*/bar/baz", "foo/bar", false) - assert.Equal(t, true, ok) - assert.Equal(t, true, partial) - - ok, partial = Match("*/bar/baz", "foo/bax", false) - assert.Equal(t, false, ok) - assert.Equal(t, true, partial) - - ok, partial = Match("*/*/baz", "foo/bar/baz", false) - assert.Equal(t, true, ok) - assert.Equal(t, false, partial) -} diff --git a/walker.go b/walker.go index 2eb1bc06..f93434dc 100644 --- a/walker.go +++ b/walker.go @@ -10,7 +10,6 @@ import ( "github.com/docker/docker/pkg/fileutils" "github.com/pkg/errors" - "github.com/tonistiigi/fsutil/prefix" "github.com/tonistiigi/fsutil/types" ) @@ -36,20 +35,15 @@ func Walk(ctx context.Context, p string, opt *WalkOpt, fn filepath.WalkFunc) err return errors.WithStack(&os.PathError{Op: "walk", Path: root, Err: syscall.ENOTDIR}) } - var pm *fileutils.PatternMatcher - if opt != nil && opt.ExcludePatterns != nil { - pm, err = fileutils.NewPatternMatcher(opt.ExcludePatterns) - if err != nil { - return errors.Wrapf(err, "invalid excludepatterns: %s", opt.ExcludePatterns) - } - } + var ( + includePatterns []string + includeMatcher *fileutils.PatternMatcher + excludeMatcher *fileutils.PatternMatcher + ) - var includePatterns []string if opt != nil && opt.IncludePatterns != nil { includePatterns = make([]string, len(opt.IncludePatterns)) - for k := range opt.IncludePatterns { - includePatterns[k] = filepath.Clean(opt.IncludePatterns[k]) - } + copy(includePatterns, opt.IncludePatterns) } if opt != nil && opt.FollowPaths != nil { targets, err := FollowLinks(p, opt.FollowPaths) @@ -61,13 +55,32 @@ func Walk(ctx context.Context, p string, opt *WalkOpt, fn filepath.WalkFunc) err includePatterns = dedupePaths(includePatterns) } } + if len(includePatterns) != 0 { + includeMatcher, err = fileutils.NewPatternMatcher(includePatterns) + if err != nil { + return errors.Wrapf(err, "invalid includepatterns: %s", opt.IncludePatterns) + } + } - var ( - lastIncludedDir string + if opt != nil && opt.ExcludePatterns != nil { + excludeMatcher, err = fileutils.NewPatternMatcher(opt.ExcludePatterns) + if err != nil { + return errors.Wrapf(err, "invalid excludepatterns: %s", opt.ExcludePatterns) + } + } - parentDirs []string // used only for exclude handling - parentMatchedExclude []bool - ) + type visitedDir struct { + fi os.FileInfo + path string + origpath string + pathWithSep string + matchedInclude bool + matchedExclude bool + calledFn bool + } + + // used only for include/exclude handling + var parentDirs []visitedDir seenFiles := make(map[uint64]string) return filepath.Walk(root, func(path string, fi os.FileInfo, err error) (retErr error) { @@ -90,87 +103,84 @@ func Walk(ctx context.Context, p string, opt *WalkOpt, fn filepath.WalkFunc) err return nil } - if opt != nil { - if includePatterns != nil { - skip := false - if lastIncludedDir != "" { - if strings.HasPrefix(path, lastIncludedDir+string(filepath.Separator)) { - skip = true - } - } + var dir visitedDir - if !skip { - matched := false - partial := true - for _, pattern := range includePatterns { - if ok, p := prefix.Match(pattern, path, false); ok { - matched = true - if !p { - partial = false - break - } - } - } - if !matched { - if fi.IsDir() { - return filepath.SkipDir - } - return nil - } - if !partial && fi.IsDir() { - lastIncludedDir = path - } + if includeMatcher != nil || excludeMatcher != nil { + for len(parentDirs) != 0 { + lastParentDir := parentDirs[len(parentDirs)-1].pathWithSep + if strings.HasPrefix(path, lastParentDir) { + break } + parentDirs = parentDirs[:len(parentDirs)-1] } - if pm != nil { - for len(parentMatchedExclude) != 0 { - lastParentDir := parentDirs[len(parentDirs)-1] - if strings.HasPrefix(path, lastParentDir) { - break - } - parentDirs = parentDirs[:len(parentDirs)-1] - parentMatchedExclude = parentMatchedExclude[:len(parentMatchedExclude)-1] - } - var m bool - if len(parentMatchedExclude) != 0 { - m, err = pm.MatchesUsingParentResult(path, parentMatchedExclude[len(parentMatchedExclude)-1]) - } else { - m, err = pm.MatchesOrParentMatches(path) + if fi.IsDir() { + dir = visitedDir{ + fi: fi, + path: path, + origpath: origpath, + pathWithSep: path + string(filepath.Separator), } - if err != nil { - return errors.Wrap(err, "failed to match excludepatterns") + } + } + + skip := false + + if includeMatcher != nil { + var parentMatchedInclude *bool + if len(parentDirs) != 0 { + parentMatchedInclude = &parentDirs[len(parentDirs)-1].matchedInclude + } + m, err := matchesPatterns(includeMatcher, path, parentMatchedInclude) + if err != nil { + return errors.Wrap(err, "failed to match includepatterns") + } + + if fi.IsDir() { + dir.matchedInclude = m + } + + if !m { + skip = true + } + } + + if excludeMatcher != nil { + var parentMatchedExclude *bool + if len(parentDirs) != 0 { + parentMatchedExclude = &parentDirs[len(parentDirs)-1].matchedExclude + } + m, err := matchesPatterns(excludeMatcher, path, parentMatchedExclude) + if err != nil { + return errors.Wrap(err, "failed to match excludepatterns") + } + + if fi.IsDir() { + dir.matchedExclude = m + } + + if m { + if fi.IsDir() && !excludeMatcher.Exclusions() { + return filepath.SkipDir } + skip = true + } + } - var dirSlash string + if includeMatcher != nil || excludeMatcher != nil { + defer func() { if fi.IsDir() { - dirSlash = path + string(filepath.Separator) - parentDirs = append(parentDirs, dirSlash) - parentMatchedExclude = append(parentMatchedExclude, m) + parentDirs = append(parentDirs, dir) } + }() + } - if m { - if fi.IsDir() { - if !pm.Exclusions() { - return filepath.SkipDir - } - for _, pat := range pm.Patterns() { - if !pat.Exclusion() { - continue - } - patStr := pat.String() + string(filepath.Separator) - if strings.HasPrefix(patStr, dirSlash) { - goto passedFilter - } - } - return filepath.SkipDir - } - return nil - } - } + if skip { + return nil } - passedFilter: + dir.calledFn = true + stat, err := mkstat(origpath, path, fi, seenFiles) if err != nil { return err @@ -185,6 +195,31 @@ func Walk(ctx context.Context, p string, opt *WalkOpt, fn filepath.WalkFunc) err return nil } } + for i, parentDir := range parentDirs { + if parentDir.calledFn { + continue + } + parentStat, err := mkstat(parentDir.origpath, parentDir.path, parentDir.fi, seenFiles) + if err != nil { + return err + } + + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + if opt != nil && opt.Map != nil { + if allowed := opt.Map(parentStat.Path, parentStat); !allowed { + continue + } + } + + if err := fn(parentStat.Path, &StatInfo{parentStat}, nil); err != nil { + return err + } + parentDirs[i].calledFn = true + } if err := fn(stat.Path, &StatInfo{stat}, nil); err != nil { return err } @@ -193,6 +228,23 @@ func Walk(ctx context.Context, p string, opt *WalkOpt, fn filepath.WalkFunc) err }) } +func matchesPatterns(pm *fileutils.PatternMatcher, path string, parentMatched *bool) (bool, error) { + var ( + m bool + err error + ) + if parentMatched != nil { + m, err = pm.MatchesUsingParentResult(path, *parentMatched) + } else { + m, err = pm.MatchesOrParentMatches(path) + } + if err != nil { + return false, err + } + + return m, nil +} + type StatInfo struct { *types.Stat } diff --git a/walker_test.go b/walker_test.go index 8b9f2d35..2b71827c 100644 --- a/walker_test.go +++ b/walker_test.go @@ -87,8 +87,7 @@ file bar/foo }, bufWalk(b)) assert.NoError(t, err) - assert.Equal(t, `dir bar -`, string(b.Bytes())) + assert.Empty(t, b.Bytes()) b.Reset() err = Walk(context.Background(), d, &WalkOpt{ @@ -305,3 +304,357 @@ func tmpDir(inp []*change) (dir string, retErr error) { } return tmpdir, nil } + +func BenchmarkWalker(b *testing.B) { + for _, scenario := range []struct { + maxDepth int + pattern string + expected int + }{{ + maxDepth: 1, + pattern: "target", + expected: 1, + }, { + maxDepth: 1, + pattern: "**/target", + expected: 1, + }, { + maxDepth: 2, + pattern: "*/target", + expected: 52, + }, { + maxDepth: 2, + pattern: "**/target", + expected: 52, + }, { + maxDepth: 3, + pattern: "*/*/target", + expected: 1378, + }, { + maxDepth: 3, + pattern: "**/target", + expected: 1378, + }, { + maxDepth: 4, + pattern: "*/*/*/target", + expected: 2794, + }, { + maxDepth: 4, + pattern: "**/target", + expected: 2794, + }, { + maxDepth: 5, + pattern: "*/*/*/*/target", + expected: 1405, + }, { + maxDepth: 5, + pattern: "**/target", + expected: 1405, + }, { + maxDepth: 6, + pattern: "*/*/*/*/*/target", + expected: 2388, + }, { + maxDepth: 6, + pattern: "**/target", + expected: 2388, + }} { + scenario := scenario // copy loop var + b.Run(fmt.Sprintf("[%d]-%s", scenario.maxDepth, scenario.pattern), func(b *testing.B) { + tmpdir, err := ioutil.TempDir("", "walk") + if err != nil { + b.Error(err) + } + defer func() { + b.StopTimer() + os.RemoveAll(tmpdir) + }() + mkBenchTree(tmpdir, scenario.maxDepth, 1) + + // don't include time to setup dirs in benchmark + b.ResetTimer() + + for i := 0; i < b.N; i++ { + count := 0 + err = Walk(context.Background(), tmpdir, &WalkOpt{ + IncludePatterns: []string{scenario.pattern}, + }, func(path string, fi os.FileInfo, err error) error { + count++ + return nil + }) + if err != nil { + b.Error(err) + } + if count != scenario.expected { + b.Errorf("Got count %d, expected %d", count, scenario.expected) + } + } + }) + } + +} + +func TestWalkerDoublestarInclude(t *testing.T) { + d, err := tmpDir(changeStream([]string{ + "ADD a dir", + "ADD a/b dir", + "ADD a/b/baz dir", + "ADD a/b/bar dir ", + "ADD a/b/bar/foo file", + "ADD a/b/bar/fop file", + "ADD bar dir", + "ADD bar/foo file", + "ADD baz dir", + "ADD foo2 file", + "ADD foo dir", + "ADD foo/bar dir", + "ADD foo/bar/bee file", + })) + + assert.NoError(t, err) + defer os.RemoveAll(d) + b := &bytes.Buffer{} + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir a + dir a/b + dir a/b/bar + file a/b/bar/foo + file a/b/bar/fop + dir a/b/baz + dir bar + file bar/foo + dir baz + dir foo + dir foo/bar + file foo/bar/bee + file foo2 + `, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/bar"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir a + dir a/b + dir a/b/bar + file a/b/bar/foo + file a/b/bar/fop + dir bar + file bar/foo + dir foo + dir foo/bar + file foo/bar/bee + `, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/bar/foo"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir a + dir a/b + dir a/b/bar + file a/b/bar/foo + dir bar + file bar/foo + `, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/b*"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir a + dir a/b + dir a/b/bar + file a/b/bar/foo + file a/b/bar/fop + dir a/b/baz + dir bar + file bar/foo + dir baz + dir foo + dir foo/bar + file foo/bar/bee + `, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/bar/f*"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir a + dir a/b + dir a/b/bar + file a/b/bar/foo + file a/b/bar/fop + dir bar + file bar/foo + `, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/bar/g*"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ``, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/f*"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir a + dir a/b + dir a/b/bar + file a/b/bar/foo + file a/b/bar/fop + dir bar + file bar/foo + dir foo + dir foo/bar + file foo/bar/bee + file foo2 + `, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/b*/f*"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir a + dir a/b + dir a/b/bar + file a/b/bar/foo + file a/b/bar/fop + dir bar + file bar/foo + `, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/b*/foo"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir a + dir a/b + dir a/b/bar + file a/b/bar/foo + dir bar + file bar/foo + `, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/foo/**"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir foo + dir foo/bar + file foo/bar/bee + `, string(b.Bytes())) + + b.Reset() + err = Walk(context.Background(), d, &WalkOpt{ + IncludePatterns: []string{"**/baz"}, + }, bufWalk(b)) + assert.NoError(t, err) + + trimEqual(t, ` + dir a + dir a/b + dir a/b/baz + dir baz + `, string(b.Bytes())) +} + +func trimEqual(t assert.TestingT, expected, actual string, msgAndArgs ...interface{}) bool { + lines := []string{} + for _, line := range strings.Split(expected, "\n") { + line = strings.TrimSpace(line) + if line != "" { + lines = append(lines, line) + } + } + lines = append(lines, "") // we expect a trailing newline + expected = strings.Join(lines, "\n") + + return assert.Equal(t, expected, actual, msgAndArgs) +} + +// mkBenchTree will create directories named a-z recursively +// up to 3 layers deep. If maxDepth is > 3 we will shorten +// the last letter to prevent the generated inodes going over +// 25k. The final directory in the tree will contain only files. +// Additionally there is a single file named `target` +// in each leaf directory. +func mkBenchTree(dir string, maxDepth, depth int) error { + end := 'z' + switch maxDepth { + case 1, 2, 3: + end = 'z' // max 19682 inodes + case 4: + end = 'k' // max 19030 inodes + case 5: + end = 'e' // max 12438 inodes + case 6: + end = 'd' // max 8188 inodes + case 7, 8: + end = 'c' // max 16398 inodes + case 9, 10, 11, 12: + end = 'b' // max 16378 inodes + default: + panic("depth cannot be > 12, would create too many files") + } + + if depth == maxDepth { + fd, err := os.Create(filepath.Join(dir, "target")) + if err != nil { + return err + } + fd.Close() + } + for r := 'a'; r <= end; r++ { + p := filepath.Join(dir, string(r)) + if depth == maxDepth { + fd, err := os.Create(p) + if err != nil { + return err + } + fd.Close() + } else { + err := os.Mkdir(p, 0755) + if err != nil { + return err + } + err = mkBenchTree(p, maxDepth, depth+1) + if err != nil { + return err + } + } + } + return nil +}