Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize Pack for deep, ignored directories #50

Merged
merged 2 commits into from
Dec 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions internal/ignorefiles/ignorerules.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@ type Ruleset struct {
rules []rule
}

// ExcludesResult is the result of matching a path against a Ruleset. A result
// is Excluded if it matches a set of paths that are excluded by the rules in a
// Ruleset. A matching result is Dominating if none of the rules that follow it
// contain a negation, implying that if the rule excludes a directory,
// everything below that directory may be ignored.
type ExcludesResult struct {
Excluded bool
Dominating bool
}

// ParseIgnoreFileContent takes a reader over the content of a .terraformignore
// file and returns the Ruleset described by that file, or an error if the
// file is invalid.
Expand Down Expand Up @@ -57,19 +67,20 @@ func LoadPackageIgnoreRules(packageDir string) (*Ruleset, error) {
// excluded by the rules in the ruleset.
//
// If any of the rules in the ruleset have invalid syntax then Excludes will
// return an error, but it will also still return a boolean result which
// return an error, but it will also still return a result which
// considers all of the remaining valid rules, to support callers that want to
// just ignore invalid exclusions. Such callers can safely ignore the error
// result:
//
// exc, _ = ruleset.Excludes(path)
func (r *Ruleset) Excludes(path string) (bool, error) {
// exc, matching, _ = ruleset.Excludes(path)
func (r *Ruleset) Excludes(path string) (ExcludesResult, error) {
if r == nil {
return false, nil
return ExcludesResult{}, nil
}

var retErr error
foundMatch := false
dominating := false
for _, rule := range r.rules {
match, err := rule.match(path)
if err != nil {
Expand All @@ -81,16 +92,20 @@ func (r *Ruleset) Excludes(path string) (bool, error) {
}
}
if match {
foundMatch = !rule.excluded
foundMatch = !rule.negated
dominating = foundMatch && !rule.negationsAfter
}
}
return foundMatch, retErr
return ExcludesResult{
Excluded: foundMatch,
Dominating: dominating,
}, retErr
}

// Includes is the inverse of [Ruleset.Excludes].
func (r *Ruleset) Includes(path string) (bool, error) {
notRet, err := r.Excludes(path)
return !notRet, err
result, err := r.Excludes(path)
return !result.Excluded, err
}

var DefaultRuleset *Ruleset
Expand Down
32 changes: 21 additions & 11 deletions internal/ignorefiles/terraformignore.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ func readRules(input io.Reader) ([]rule, error) {
rules := defaultExclusions
scanner := bufio.NewScanner(input)
scanner.Split(bufio.ScanLines)
currentRuleIndex := len(defaultExclusions) - 1

for scanner.Scan() {
pattern := scanner.Text()
Expand All @@ -32,8 +33,15 @@ func readRules(input io.Reader) ([]rule, error) {
rule := rule{}
// Exclusions
if pattern[0] == '!' {
rule.excluded = true
rule.negated = true
pattern = pattern[1:]
// Mark all previous rules as having negations after it
for i := currentRuleIndex; i >= 0; i-- {
if rules[i].negationsAfter {
break
}
rules[i].negationsAfter = true
}
}
// If it is a directory, add ** so we catch descendants
if pattern[len(pattern)-1] == os.PathSeparator {
Expand All @@ -49,6 +57,7 @@ func readRules(input io.Reader) ([]rule, error) {
rule.val = pattern
rule.dirs = strings.Split(pattern, string(os.PathSeparator))
rules = append(rules, rule)
currentRuleIndex += 1
}

if err := scanner.Err(); err != nil {
Expand All @@ -58,10 +67,11 @@ func readRules(input io.Reader) ([]rule, error) {
}

type rule struct {
val string // the value of the rule itself
excluded bool // ! is present, an exclusion rule
dirs []string // directories of the rule
regex *regexp.Regexp // regular expression to match for the rule
val string // the value of the rule itself
negated bool // prefixed by !, a negated rule
negationsAfter bool // negatied rules appear after this rule
dirs []string // directories of the rule
regex *regexp.Regexp // regular expression to match for the rule
}

func (r *rule) match(path string) (bool, error) {
Expand Down Expand Up @@ -160,16 +170,16 @@ func (r *rule) compile() error {

var defaultExclusions = []rule{
{
val: strings.Join([]string{"**", ".git", "**"}, string(os.PathSeparator)),
excluded: false,
val: strings.Join([]string{"**", ".git", "**"}, string(os.PathSeparator)),
negated: false,
},
{
val: strings.Join([]string{"**", ".terraform", "**"}, string(os.PathSeparator)),
excluded: false,
val: strings.Join([]string{"**", ".terraform", "**"}, string(os.PathSeparator)),
negated: false,
},
{
val: strings.Join([]string{"**", ".terraform", "modules", "**"}, string(os.PathSeparator)),
excluded: true,
val: strings.Join([]string{"**", ".terraform", "modules", "**"}, string(os.PathSeparator)),
negated: true,
},
}

Expand Down
43 changes: 41 additions & 2 deletions internal/ignorefiles/terraformignore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ func TestTerraformIgnore(t *testing.T) {
if err != nil {
t.Fatal(err)
}

type file struct {
// the actual path, should be file path format /dir/subdir/file.extension
path string
Expand Down Expand Up @@ -112,13 +113,51 @@ func TestTerraformIgnore(t *testing.T) {
},
}
for i, p := range paths {
match, err := rs.Excludes(p.path)
result, err := rs.Excludes(p.path)
if err != nil {
t.Errorf("invalid rule syntax when checking %s at index %d", p.path, i)
continue
}
if match != p.match {
if result.Excluded != p.match {
t.Fatalf("%s at index %d should be %t", p.path, i, p.match)
}
}
}

func TestTerraformIgnoreNoExclusionOptimization(t *testing.T) {
rs, err := LoadPackageIgnoreRules("testdata/with-exclusion")
if err != nil {
t.Fatal(err)
}
if len(rs.rules) != 7 {
t.Fatalf("Expected 7 rules, got %d", len(rs.rules))
}

// reflects that no negations follow the last rule
afterValue := false
for i := len(rs.rules) - 1; i >= 0; i-- {
r := rs.rules[i]
if r.negationsAfter != afterValue {
t.Errorf("Expected exclusionsAfter to be %v at index %d", afterValue, i)
}
if r.negated {
afterValue = true
}
}

// last two will be dominating
for _, r := range []string{"logs/", "tmp/"} {
result, err := rs.Excludes(r)
if err != nil {
t.Fatal(err)
}
if !result.Dominating {
t.Errorf("Expected %q to be a dominating rule", r)
}
}

if actual, _ := rs.Excludes("src/baz/ignored"); !actual.Excluded {
t.Errorf("Expected %q to be excluded, but it was included", "src/baz/ignored")
}

}
4 changes: 2 additions & 2 deletions internal/ignorefiles/testdata/archive-dir/.terraformignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

# ignore a directory
terraform.d/
# exclude ignoring a directory at the root
# negate ignoring a directory at the root
!/terraform.d/
# ignore a file at a subpath
**/foo/bar.tf
Expand All @@ -17,4 +17,4 @@ bar/something-[a-z].txt
# ignore a file
boop.txt
# but not one at the current directory
!/boop.txt
!/boop.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
src/**/*
# except at one directory
!src/foo/bar.txt
logs/
tmp/
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
foo
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ignored
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bar
1 change: 1 addition & 0 deletions internal/ignorefiles/testdata/with-exclusion/tmp/tmp.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
tmp
10 changes: 7 additions & 3 deletions slug.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,15 +199,19 @@ func (p *Packer) packWalkFn(root, src, dst string, tarW *tar.Writer, meta *Meta,
return nil
}

if m := matchIgnoreRules(subpath, ignoreRules); m {
if r := matchIgnoreRules(subpath, ignoreRules); r.Excluded {
return nil
}

// Catch directories so we don't end up with empty directories,
// the files are ignored correctly
if info.IsDir() {
if m := matchIgnoreRules(subpath+string(os.PathSeparator), ignoreRules); m {
return nil
if r := matchIgnoreRules(subpath+string(os.PathSeparator), ignoreRules); r.Excluded {
if r.Dominating {
return filepath.SkipDir
} else {
return nil
}
}
}

Expand Down
9 changes: 7 additions & 2 deletions sourcebundle/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,7 @@ func packagePrepareWalkFn(root string, ignoreRules *ignorefiles.Ruleset) filepat
if err != nil {
return fmt.Errorf("invalid .terraformignore rules: %#w", err)
}
if ignored {
if ignored.Excluded {
err := os.RemoveAll(absPath)
if err != nil {
return fmt.Errorf("failed to remove ignored file %s: %s", relPath, err)
Expand All @@ -642,12 +642,17 @@ func packagePrepareWalkFn(root string, ignoreRules *ignorefiles.Ruleset) filepat

// For directories we also need to check with a path separator on the
// end, which ignores entire subtrees.
//
// TODO: What about exclusion rules that follow a matching directory?
// Example:
// /logs
// !/logs/production/*
if info.IsDir() {
ignored, err := ignoreRules.Excludes(relPath + string(os.PathSeparator))
if err != nil {
return fmt.Errorf("invalid .terraformignore rules: %#w", err)
}
if ignored {
if ignored.Excluded {
err := os.RemoveAll(absPath)
if err != nil {
return fmt.Errorf("failed to remove ignored file %s: %s", relPath, err)
Expand Down
2 changes: 1 addition & 1 deletion terraformignore.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func parseIgnoreFile(rootPath string) *ignorefiles.Ruleset {
return ret
}

func matchIgnoreRules(path string, ruleset *ignorefiles.Ruleset) bool {
func matchIgnoreRules(path string, ruleset *ignorefiles.Ruleset) ignorefiles.ExcludesResult {
// Ruleset.Excludes explicitly allows ignoring its error, in which
// case we are ignoring any individual invalid rules in the set
// but still taking all of the others into account.
Expand Down
4 changes: 2 additions & 2 deletions testdata/archive-dir-no-external/.terraformignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

# ignore a directory
terraform.d/
# exclude ignoring a directory at the root
# negate ignoring a directory at the root
!/terraform.d/
# ignore a file at a subpath
**/foo/bar.tf
Expand All @@ -17,4 +17,4 @@ bar/something-[a-z].txt
# ignore a file
boop.txt
# but not one at the current directory
!/boop.txt
!/boop.txt
4 changes: 2 additions & 2 deletions testdata/archive-dir/.terraformignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

# ignore a directory
terraform.d/
# exclude ignoring a directory at the root
# negate ignoring a directory at the root
!/terraform.d/
# ignore a file at a subpath
**/foo/bar.tf
Expand All @@ -17,4 +17,4 @@ bar/something-[a-z].txt
# ignore a file
boop.txt
# but not one at the current directory
!/boop.txt
!/boop.txt