Skip to content

Commit

Permalink
refactor: change pathTrie per-file to per-directory (#1931)
Browse files Browse the repository at this point in the history
Now the `pathTrie` aligns with the directory tree, not full fs tree
which includes files. Aligning with the directory tree makes more sense
to me, and I think will make it simpler to do things like load BUILDs
concurrently while doing the fs-traversal (and persisting that in the
`pathTrie`) etc.

**What type of PR is this?**

> Other

**What package or component does this PR mostly affect?**

> all

**What does this PR do? Why is it needed?**

Performance + simplifying things for future changes.
  • Loading branch information
jbedard authored Nov 10, 2024
1 parent bf8c993 commit ed19735
Showing 1 changed file with 31 additions and 33 deletions.
64 changes: 31 additions & 33 deletions walk/walk.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import (
"os"
"path"
"path/filepath"
"sort"
"strings"

"github.com/bazelbuild/bazel-gazelle/config"
Expand Down Expand Up @@ -135,19 +134,10 @@ func Walk(c *config.Config, cexts []config.Configurer, dirs []string, mode Mode,
func visit(c *config.Config, cexts []config.Configurer, knownDirectives map[string]bool, updateRels *UpdateFilter, trie *pathTrie, wf WalkFunc, rel string, updateParent bool) {
haveError := false

ents := make([]fs.DirEntry, 0, len(trie.children))
for _, node := range trie.children {
ents = append(ents, *node.entry)
}

sort.Slice(ents, func(i, j int) bool {
return ents[i].Name() < ents[j].Name()
})

// Absolute path to the directory being visited
dir := filepath.Join(c.RepoRoot, rel)

f, err := loadBuildFile(c, rel, dir, ents)
f, err := loadBuildFile(c, rel, dir, trie.files)
if err != nil {
log.Print(err)
if c.Strict {
Expand All @@ -165,28 +155,35 @@ func visit(c *config.Config, cexts []config.Configurer, knownDirectives map[stri
return
}

var subdirs, regularFiles []string
for _, ent := range ents {
// Filter and collect files
var regularFiles []string
for _, ent := range trie.files {
base := ent.Name()
entRel := path.Join(rel, base)
if wc.isExcluded(entRel) {
continue
}
ent := resolveFileInfo(wc, dir, entRel, ent)
switch {
case ent == nil:
continue
case ent.IsDir():
subdirs = append(subdirs, base)
default:
if ent := resolveFileInfo(wc, dir, entRel, ent); ent != nil {
regularFiles = append(regularFiles, base)
}
}

shouldUpdate := updateRels.shouldUpdate(rel, updateParent)
for _, sub := range subdirs {
if subRel := path.Join(rel, sub); updateRels.shouldVisit(subRel, shouldUpdate) {
visit(c, cexts, knownDirectives, updateRels, trie.children[sub], wf, subRel, shouldUpdate)

// Filter and collect subdirectories
var subdirs []string
for _, t := range trie.children {
base := t.entry.Name()
entRel := path.Join(rel, base)
if wc.isExcluded(entRel) {
continue
}
if ent := resolveFileInfo(wc, dir, entRel, t.entry); ent != nil {
subdirs = append(subdirs, base)

if updateRels.shouldVisit(entRel, shouldUpdate) {
visit(c, cexts, knownDirectives, updateRels, t, wf, entRel, shouldUpdate)
}
}
}

Expand Down Expand Up @@ -361,19 +358,20 @@ func resolveFileInfo(wc *walkConfig, dir, rel string, ent fs.DirEntry) fs.DirEnt
}

type pathTrie struct {
children map[string]*pathTrie
entry *fs.DirEntry
entry fs.DirEntry
files []fs.DirEntry
children []*pathTrie
}

// Basic factory method to ensure the entry is properly copied
func newTrie(entry fs.DirEntry) *pathTrie {
return &pathTrie{entry: &entry}
return &pathTrie{
entry: entry,
}
}

func buildTrie(c *config.Config, isIgnored isIgnoredFunc) (*pathTrie, error) {
trie := &pathTrie{
children: map[string]*pathTrie{},
}
trie := &pathTrie{}

// A channel to limit the number of concurrent goroutines
limitCh := make(chan struct{}, 100)
Expand Down Expand Up @@ -406,14 +404,14 @@ func walkDir(root, rel string, eg *errgroup.Group, limitCh chan struct{}, isIgno
continue
}

entryTrie := newTrie(entry)
trie.children[entry.Name()] = entryTrie

if entry.IsDir() {
entryTrie.children = map[string]*pathTrie{}
entryTrie := newTrie(entry)
trie.children = append(trie.children, entryTrie)
eg.Go(func() error {
return walkDir(root, entryPath, eg, limitCh, isIgnored, entryTrie)
})
} else {
trie.files = append(trie.files, entry)
}
}
return nil
Expand Down

0 comments on commit ed19735

Please sign in to comment.