Skip to content

Commit

Permalink
(maint) Do not recurse into recursive globbed exclusions when purging
Browse files Browse the repository at this point in the history
When we purge at the environment level we create recurse globs of our
Puppetfile managed directories, eg "<environment>/modules/**/*".

Previous to this patch we would collect a list of all content in the
environment and then match them against this recursive glob using
`File.fnmatch?`. This means we could be doing fnmatches against hundreds
of thousands of then-filtered results which can become prohibitively
slow.

This patch removes the usage of Dir.glob to retrieve all files and
instead manually recurses into child directories if set to recurse and
the child does not match either the purge_exclusions or whitelist globs.
However, because we match using a glob something like
"<environment>/modules/**/*" won't match "<environment>/modules", but
instead will filter each module within "<environment>/modules". In large
deployments this may mean filtering over a thousand child files, but
will prevent us from recursing into any of the modules themselves.

This maintains the post-recursion filtering of "desired_contents" as
this is how git-backed, environment maintained files are cleaned.
  • Loading branch information
justinstoller committed Jul 2, 2021
1 parent 709e040 commit 9070c6a
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 10 deletions.
31 changes: 27 additions & 4 deletions lib/r10k/util/purgeable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,16 @@ def current_contents(recurse)
end
end

# @deprecated Unused helper function
# @return [Array<String>] Directory contents that are expected but not present
def pending_contents(recurse)
desired_contents - current_contents(recurse)
end

# @return [Array<String>] Directory contents that are present but not expected
def stale_contents(recurse, exclusions, whitelist)
fn_match_opts = File::FNM_PATHNAME | File::FNM_DOTMATCH
def potentially_purgeable(dir, exclusions, whitelist, recurse, fn_match_opts)
children = Pathname.new(dir).children.reject do |item|
item = item.to_s

(current_contents(recurse) - desired_contents).reject do |item|
if exclusion_match = exclusions.find { |ex_item| (ex_item == item) || File.fnmatch?(ex_item, item, fn_match_opts) }
logger.debug2 _("Not purging %{item} due to internal exclusion match: %{exclusion_match}") % {item: item, exclusion_match: exclusion_match}
elsif whitelist_match = whitelist.find { |wl_item| (wl_item == item) || File.fnmatch?(wl_item, item, fn_match_opts) }
Expand All @@ -56,6 +56,29 @@ def stale_contents(recurse, exclusions, whitelist)

!!exclusion_match || !!whitelist_match
end

children.flat_map do |child|
if File.directory?(child) && recurse
potentially_purgeable(child, exclusions, whitelist, recurse, fn_match_opts)
else
child.to_s
end
end.compact
end

# @return [Array<String>] Directory contents that are present but not expected
def stale_contents(recurse, exclusions, whitelist)
dirs = self.managed_directories
desired = self.desired_contents
fn_match_opts = File::FNM_PATHNAME | File::FNM_DOTMATCH

initial_purgelist = dirs.flat_map do |dir|
potentially_purgeable(dir, exclusions, whitelist, recurse, fn_match_opts)
end

initial_purgelist.reject do |item|
!!desired.find { |des_item| (des_item == item) || File.fnmatch?(des_item, item, fn_match_opts) }
end
end

# Forcibly remove all unmanaged content in `self.managed_directories`
Expand Down
Empty file.
42 changes: 36 additions & 6 deletions spec/unit/util/purgeable_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,12 @@

describe '#current_contents' do
it 'collects contents of all managed directories recursively' do
expect(subject.current_contents(recurse)).to contain_exactly(/\/expected_1/, /\/expected_2/, /\/unmanaged_1/, /\/unmanaged_2/, /\/managed_subdir_1/, /\/subdir_expected_1/, /\/subdir_unmanaged_1/)
expect(subject.current_contents(recurse)).
to contain_exactly(/\/expected_1/, /\/expected_2/,
/\/unmanaged_1/, /\/unmanaged_2/,
/\/managed_subdir_1/,
/\/subdir_expected_1/, /\/subdir_unmanaged_1/,
/\/managed_subdir_2/, /\/ignored_1/)
end
end

Expand All @@ -114,7 +119,8 @@
let(:whitelist) { [] }

it 'collects current_contents that should not exist recursively' do
expect(subject.stale_contents(recurse, exclusions, whitelist)).to contain_exactly(/\/unmanaged_1/, /\/unmanaged_2/, /\/subdir_unmanaged_1/)
expect(subject.stale_contents(recurse, exclusions, whitelist)).
to contain_exactly(/\/unmanaged_1/, /\/unmanaged_2/, /\/subdir_unmanaged_1/, /\/ignored_1/)
end
end

Expand All @@ -124,7 +130,17 @@

it 'collects current_contents that should not exist except whitelisted items' do
expect(subject.logger).to receive(:debug).with(/unmanaged_1.*whitelist match/i)
expect(subject.stale_contents(recurse, exclusions, whitelist)).to contain_exactly(/\/unmanaged_2/, /\/subdir_unmanaged_1/)

expect(subject.stale_contents(recurse, exclusions, whitelist)).
to contain_exactly(/\/unmanaged_2/, /\/subdir_unmanaged_1/, /\/ignored_1/)
end

it 'does not collect contents that match recursive globbed whitelist items as intermediate values' do
recursive_whitelist = ['**/managed_subdir_1/**/*']
expect(subject.logger).not_to receive(:debug).with(/ignored_1/)

expect(subject.stale_contents(recurse, exclusions, recursive_whitelist)).
to contain_exactly(/\/unmanaged_2/, /\/managed_one\/unmanaged_1/)
end
end

Expand All @@ -134,7 +150,17 @@

it 'collects current_contents that should not exist except excluded items' do
expect(subject.logger).to receive(:debug2).with(/unmanaged_2.*internal exclusion match/i)
expect(subject.stale_contents(recurse, exclusions, whitelist)).to contain_exactly(/\/unmanaged_1/, /\/subdir_unmanaged_1/)

expect(subject.stale_contents(recurse, exclusions, whitelist)).
to contain_exactly(/\/unmanaged_1/, /\/subdir_unmanaged_1/, /\/ignored_1/)
end

it 'does not collect contents that match recursive globbed exclusion items as intermediate values' do
recursive_exclusions = ['**/managed_subdir_1/**/*']
expect(subject.logger).not_to receive(:debug).with(/ignored_1/)

expect(subject.stale_contents(recurse, recursive_exclusions, whitelist)).
to contain_exactly(/\/unmanaged_2/, /\/managed_one\/unmanaged_1/)
end
end
end
Expand Down Expand Up @@ -179,7 +205,11 @@
end

context "recursive whitelist glob" do
let(:whitelist) { managed_directories.collect { |dir| File.join(dir, "**", "*unmanaged*") } }
let(:whitelist) do
managed_directories.flat_map do |dir|
[File.join(dir, "**", "*unmanaged*"), File.join(dir, "**", "managed_subdir_2")]
end
end
let(:purge_opts) { { recurse: true, whitelist: whitelist } }

describe '#purge!' do
Expand Down Expand Up @@ -214,7 +244,7 @@
context "when class does not implement #purge_exclusions" do
describe '#purge!' do
it 'purges normally' do
expect(FileUtils).to receive(:rm_r).at_least(3).times
expect(FileUtils).to receive(:rm_r).at_least(4).times

subject.purge!(purge_opts)
end
Expand Down

0 comments on commit 9070c6a

Please sign in to comment.