From aa71a4ca319d67550c8db59f9158165af96c3a28 Mon Sep 17 00:00:00 2001 From: Charlie Vieth Date: Fri, 25 Oct 2024 10:10:04 -0400 Subject: [PATCH] fastwalk: don't clean the path argument to Walk on Windows On Windows don't attempt to clean the path argument to Walk since the existing clean logic (cleanRootPath) transforms paths like "C:\" => "C:" which are not equivalent. This logic only existed to make the joining of paths simpler and should probably be removed since we shouldn't be modifying user provided paths. TODO: Investigate if anything relies on the current clean logic and remove it if nothing does. Fixes: https://github.com/charlievieth/fastwalk/issues/37 --- dirent_export_test.go | 57 ++++++++---- fastwalk.go | 16 +++- fastwalk_test.go | 202 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 254 insertions(+), 21 deletions(-) diff --git a/dirent_export_test.go b/dirent_export_test.go index a6acea4..d10b721 100644 --- a/dirent_export_test.go +++ b/dirent_export_test.go @@ -4,6 +4,7 @@ import ( "fmt" "io/fs" "os" + "runtime" "testing" "time" ) @@ -29,26 +30,44 @@ func FormatFileInfo(fi fs.FileInfo) string { }) } +// NB: this test lives here and not in fastwalk_test.go since we need to +// access the internal cleanRootPath function. func TestCleanRootPath(t *testing.T) { - tests := map[string]string{ - "": "", - "/": "/", - "//": "/", - "/foo": "/foo", - "/foo/": "/foo", - "a": "a", - `C:/`: `C:`, - } - if os.PathSeparator != '/' { - const sep = string(os.PathSeparator) - tests["C:"+sep] = `C:` - tests["C:"+sep+sep] = `C:` - tests[sep+sep] = sep - } - for in, want := range tests { - got := cleanRootPath(in) - if got != want { - t.Errorf("cleanRootPath(%q) = %q; want: %q", in, got, want) + test := func(t *testing.T, tests map[string]string) { + t.Helper() + for in, want := range tests { + got := cleanRootPath(in) + if got != want { + t.Errorf("cleanRootPath(%q) = %q; want: %q", in, got, want) + } } } + // NB: The name here isn't exactly correct since we run this for + // any non-Windows OS. + t.Run("Unix", func(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("test not supported on Windows") + } + test(t, map[string]string{ + "": "", + ".": ".", + "/": "/", + "//": "/", + "/foo": "/foo", + "/foo/": "/foo", + "a": "a", + }) + }) + // Test that cleanRootPath is a no-op on Windows + t.Run("Windows", func(t *testing.T) { + if runtime.GOOS != "windows" { + t.Skip("test only supported on Windows") + } + test(t, map[string]string{ + `C:/`: `C:/`, + `C://`: `C://`, + `\\?\GLOBALROOT`: `\\?\GLOBALROOT`, + `\\?\GLOBALROOT\\`: `\\?\GLOBALROOT\\`, + }) + }) } diff --git a/fastwalk.go b/fastwalk.go index 6d044ec..e669d5a 100644 --- a/fastwalk.go +++ b/fastwalk.go @@ -563,12 +563,14 @@ func (w *walker) joinPaths(dir, base string) string { // Handle the case where the root path argument to Walk is "/" // without this the returned path is prefixed with "//". if os.PathSeparator == '/' { - if dir == "/" { + if len(dir) != 0 && dir[len(dir)-1] == '/' { return dir + base } return dir + "/" + base } - // TODO: handle the above case of the argument to Walk being "/" + if len(dir) != 0 && os.IsPathSeparator(dir[len(dir)-1]) { + return dir + base + } if w.toSlash { return dir + "/" + base } @@ -625,7 +627,17 @@ func (w *walker) walk(root string, info DirEntry, runUserCallback bool) error { return nil } +// cleanRootPath returns the root path trimmed of extraneous trailing slashes. +// This is a no-op on Windows. func cleanRootPath(root string) string { + if runtime.GOOS == "windows" || len(filepath.VolumeName(root)) != 0 { + // Windows paths or any path with a volume name (which AFAIK should + // only be Windows) are a bit too complicated to clean. + return root + } + if len(filepath.VolumeName(root)) != 0 { + return root + } for i := len(root) - 1; i >= 0; i-- { if !os.IsPathSeparator(root[i]) { return root[:i+1] diff --git a/fastwalk_test.go b/fastwalk_test.go index 6f0ecc6..63d0718 100644 --- a/fastwalk_test.go +++ b/fastwalk_test.go @@ -12,6 +12,7 @@ import ( "os" "path/filepath" "reflect" + "regexp" "runtime" "sort" "strings" @@ -388,6 +389,207 @@ func TestFastWalk_LongPath(t *testing.T) { } } +func TestFastWalk_WindowsRootPaths(t *testing.T) { + if runtime.GOOS != "windows" { + t.Skip("test only supported on Windows") + } + + sameFile := func(t *testing.T, name1, name2 string) bool { + fi1, err := os.Stat(name1) + if err != nil { + t.Fatal(err) + } + fi2, err := os.Stat(name2) + if err != nil { + t.Fatal(err) + } + return os.SameFile(fi1, fi2) + } + + walk := func(t *testing.T, root string) map[string]fs.DirEntry { + var mu sync.Mutex + seen := make(map[string]fs.DirEntry) + errStop := errors.New("errStop") + fn := func(path string, de fs.DirEntry, err error) error { + if err != nil { + return err + } + mu.Lock() + seen[path] = de + mu.Unlock() + if path != root && de.IsDir() { + return fs.SkipDir + } + return nil + } + err := fastwalk.Walk(nil, root, fastwalk.IgnorePermissionErrors(fn)) + if err != nil && err != errStop { + t.Fatal(err) + } + if len(seen) <= 1 { + // If we are a child of the root directory we should have visited at + // least two entries: the root itself and a directory that leads to, + // or is, our current working directory. + t.Fatalf("empty directory: %s", root) + } + return seen + } + + pwd, err := filepath.Abs(".") + if err != nil { + t.Fatal(err) + } + + vol := filepath.VolumeName(pwd) + if !regexp.MustCompile(`^[A-Za-z]:$`).MatchString(vol) { + // Ignore UNC names and other weird Windows paths to keep this simple. + t.Skipf("unsupported volume name: %s for path: %s", vol, pwd) + } + if !sameFile(t, pwd, vol) { + t.Skipf("skipping %s and %s should be considered the same file", pwd, vol) + } + + // Test that walking the disk root ("C:\") actually walks the disk root. + // Previously, there was a bug where the path "C:\" was transformed to "C:" + // before walking which caused fastwalk to walk the current directory. + // + // https://github.com/charlievieth/fastwalk/issues/37 + t.Run("FullyQualified", func(t *testing.T) { + root := vol + `\` + if sameFile(t, pwd, root) { + t.Skipf("the current working directory (%s) is the disk root: %s", pwd, root) + } + seen := walk(t, root) + + // Make sure we don't append an extraneous slash to the root ("C:\" => "C:\\a"). + for path := range seen { + rest := strings.TrimPrefix(path, vol) + if strings.Contains(rest, `\\`) { + t.Errorf(`path contains multiple consecutive slashes after volume (%s): "%s"`, + vol, path) + } + if s := filepath.Clean(path); s != path { + t.Errorf(`filepath.Clean("%s") == "%s"`, path, s) + } + } + + // Make sure we didn't walk the current directory. This will happen if + // the root argument to Walk is a drive letter ("C:\") but we strip off + // the trailing slash ("C:\" => "C:") since this makes the path relative + // to the current directory on drive "C". + // + // See: https://github.com/charlievieth/fastwalk/issues/37 + // + // Docs: https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#fully-qualified-vs-relative-paths + for path, de := range seen { + if path == root { + // Ignore root since filepath.Base("C:\") == "\" and "C:\" and "\" + // are equivalent. + continue + } + fi1, err := de.Info() + if err != nil { + if os.IsNotExist(err) || os.IsPermission(err) { + continue + } + t.Fatal(err) + } + name := filepath.Base(path) + fi2, err := os.Lstat(name) + if err != nil { + continue + } + if os.SameFile(fi1, fi2) { + t.Errorf("Walking root (%s) returned entries for the current working "+ + "directory (%s): file %s is the same as %s", root, pwd, path, name) + } + } + + // Add file base name mappings + for _, de := range seen { + seen[de.Name()] = de + } + + // Make sure we read some files from the disk root. + des, err := os.ReadDir(root) + if err != nil { + t.Fatal(err) + } + if len(des) == 0 { + t.Fatalf("Disk root %s contains no files!", root) + } + same := 0 + for _, d2 := range des { + d1 := seen[d2.Name()] + if d1 == nil { + continue + } + fi1, err := d1.Info() + if err != nil { + t.Log(err) + continue + } + fi2, err := d2.Info() + if err != nil { + t.Log(err) + continue + } + if os.SameFile(fi1, fi2) { + same++ + } + } + // TODO: Expect to see N% of files and use + // a more descriptive error message + if same == 0 { + t.Fatalf(`Error failed to walk dist root: "%s"`, root) + } + }) + + // Test that paths like "C:" are treated as a relative path. + t.Run("Relative", func(t *testing.T) { + seen := walk(t, vol) + + // Make sure we don't append an extraneous slash to the root ("C:\" => "C:\\a"). + for path := range seen { + rest := strings.TrimPrefix(path, vol) + if strings.Contains(rest, `\\`) { + t.Errorf(`path contains multiple consecutive slashes after volume (%s): "%s"`, + vol, path) + } + if path == vol { + continue // Clean("C:") => "C:." + } + if s := filepath.Clean(path); s != path { + t.Errorf(`filepath.Clean("%s") == "%s"`, path, s) + } + } + + // Make sure we walk the current directory. + for path, de := range seen { + if path == vol { + // Ignore the volume since filepath.Base("C:") == "\" and "C:" and "\" + // are not equivalent. + continue + } + fi1, err := de.Info() + if err != nil { + t.Fatal(err) + } + name := filepath.Base(path) + fi2, err := os.Lstat(name) + if err != nil { + // NB: This test will fail if this file is removed while it's + // running. There are workarounds for this, but for now it's + // simpler to just error if that happens. + t.Fatal(err) + } + if !os.SameFile(fi1, fi2) { + t.Errorf("Expected files (%s) and (%s) to be the same", path, name) + } + } + }) +} + func TestFastWalk_Symlink(t *testing.T) { testFastWalk(t, map[string]string{ "foo/foo.go": "one",