Skip to content

Commit

Permalink
(CODEMGMT-1299) Do not recurse into recursive globbed exclusions when…
Browse files Browse the repository at this point in the history
… purging

When we purge at the environment level we create recurse globs of our
Puppetfile managed directories, eg "<environment>/modules/**/*".

Previous to this patch we would collect a list of all content in the
environment and then match them against this recursive glob using
`File.fnmatch?`. This means we could be doing fnmatches against hundreds
of thousands of then-filtered results which can become prohibitively
slow.

This patch removes the usage of Dir.glob to retrieve all files and
instead manually recurses into child directories if set to recurse and
the child does not match either the purge_exclusions or whitelist globs.
However, because we match using a glob something like
"<environment>/modules/**/*" won't match "<environment>/modules", but
instead will filter each module within "<environment>/modules". In large
deployments this may mean filtering over a thousand child files, but
will prevent us from recursing into any of the modules themselves.

This maintains the post-recursion filtering of "desired_contents" as
this is how git-backed, environment maintained files are cleaned.
  • Loading branch information
justinstoller committed Jul 7, 2021
1 parent 709e040 commit 0e4d7f7
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 14 deletions.
64 changes: 56 additions & 8 deletions lib/r10k/util/purgeable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ module Util
# {#desired_contents}
module Purgeable

# Matches a directory path that contains a directory that starts with a dot '.'.
# Directory must start with a dot and then have a non-dot character, so the *nix
# special directories '.', and '..' are invalid. Matching path must have the
# dot-prepended segment as an interior segment (not at the end of the path).
HIDDEN_SUBPATH = Regexp.new("#{File::SEPARATOR}\\.[^.]+#{File::SEPARATOR}.*")

FN_MATCH_OPTS = File::FNM_PATHNAME | File::FNM_DOTMATCH

# @deprecated
#
# @!method logger
# @abstract Including classes must provide a logger method
# @return [Log4r::Logger]
Expand Down Expand Up @@ -38,24 +48,62 @@ def current_contents(recurse)
end
end

# @deprecated Unused helper function
# @return [Array<String>] Directory contents that are expected but not present
def pending_contents(recurse)
desired_contents - current_contents(recurse)
end

# @return [Array<String>] Directory contents that are present but not expected
def stale_contents(recurse, exclusions, whitelist)
fn_match_opts = File::FNM_PATHNAME | File::FNM_DOTMATCH
def nested_in_a_hidden_dir?(path)
!!(HIDDEN_SUBPATH.match? path)
end

def matches?(test, path)
if test == path
true
elsif File.fnmatch?(test, path, FN_MATCH_OPTS)
true
elsif nested_in_a_hidden_dir?(path)
path.start_with?(test)
else
false
end
end

(current_contents(recurse) - desired_contents).reject do |item|
if exclusion_match = exclusions.find { |ex_item| (ex_item == item) || File.fnmatch?(ex_item, item, fn_match_opts) }
logger.debug2 _("Not purging %{item} due to internal exclusion match: %{exclusion_match}") % {item: item, exclusion_match: exclusion_match}
elsif whitelist_match = whitelist.find { |wl_item| (wl_item == item) || File.fnmatch?(wl_item, item, fn_match_opts) }
logger.debug _("Not purging %{item} due to whitelist match: %{whitelist_match}") % {item: item, whitelist_match: whitelist_match}
def potentially_purgeable(dir, exclusions, whitelist, recurse)
children = Pathname.new(dir).children.reject do |path|
path = path.to_s

if exclusion_match = exclusions.find { |exclusion| matches?(exclusion, path) }
logger.debug2 _("Not purging %{path} due to internal exclusion match: %{exclusion_match}") % {path: path, exclusion_match: exclusion_match}
elsif whitelist_match = whitelist.find { |allowed| matches?(allowed, path) }
logger.debug _("Not purging %{path} due to whitelist match: %{whitelist_match}") % {path: path, whitelist_match: whitelist_match}
end

!!exclusion_match || !!whitelist_match
end

children.flat_map do |child|
if File.directory?(child) && recurse
potentially_purgeable(child, exclusions, whitelist, recurse)
else
child.to_s
end
end
end

# @return [Array<String>] Directory contents that are present but not expected
def stale_contents(recurse, exclusions, whitelist)
dirs = self.managed_directories
desireds = self.desired_contents

initial_purgelist = dirs.flat_map do |dir|
potentially_purgeable(dir, exclusions, whitelist, recurse)
end

initial_purgelist.reject do |path|
desireds.any? { |desired| matches?(desired, path) }
end
end

# Forcibly remove all unmanaged content in `self.managed_directories`
Expand Down
Empty file.
42 changes: 36 additions & 6 deletions spec/unit/util/purgeable_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,12 @@

describe '#current_contents' do
it 'collects contents of all managed directories recursively' do
expect(subject.current_contents(recurse)).to contain_exactly(/\/expected_1/, /\/expected_2/, /\/unmanaged_1/, /\/unmanaged_2/, /\/managed_subdir_1/, /\/subdir_expected_1/, /\/subdir_unmanaged_1/)
expect(subject.current_contents(recurse)).
to contain_exactly(/\/expected_1/, /\/expected_2/,
/\/unmanaged_1/, /\/unmanaged_2/,
/\/managed_subdir_1/,
/\/subdir_expected_1/, /\/subdir_unmanaged_1/,
/\/managed_subdir_2/, /\/ignored_1/)
end
end

Expand All @@ -114,7 +119,8 @@
let(:whitelist) { [] }

it 'collects current_contents that should not exist recursively' do
expect(subject.stale_contents(recurse, exclusions, whitelist)).to contain_exactly(/\/unmanaged_1/, /\/unmanaged_2/, /\/subdir_unmanaged_1/)
expect(subject.stale_contents(recurse, exclusions, whitelist)).
to contain_exactly(/\/unmanaged_1/, /\/unmanaged_2/, /\/subdir_unmanaged_1/, /\/ignored_1/)
end
end

Expand All @@ -124,7 +130,17 @@

it 'collects current_contents that should not exist except whitelisted items' do
expect(subject.logger).to receive(:debug).with(/unmanaged_1.*whitelist match/i)
expect(subject.stale_contents(recurse, exclusions, whitelist)).to contain_exactly(/\/unmanaged_2/, /\/subdir_unmanaged_1/)

expect(subject.stale_contents(recurse, exclusions, whitelist)).
to contain_exactly(/\/unmanaged_2/, /\/subdir_unmanaged_1/, /\/ignored_1/)
end

it 'does not collect contents that match recursive globbed whitelist items as intermediate values' do
recursive_whitelist = ['**/managed_subdir_1/**/*']
expect(subject.logger).not_to receive(:debug).with(/ignored_1/)

expect(subject.stale_contents(recurse, exclusions, recursive_whitelist)).
to contain_exactly(/\/unmanaged_2/, /\/managed_one\/unmanaged_1/)
end
end

Expand All @@ -134,7 +150,17 @@

it 'collects current_contents that should not exist except excluded items' do
expect(subject.logger).to receive(:debug2).with(/unmanaged_2.*internal exclusion match/i)
expect(subject.stale_contents(recurse, exclusions, whitelist)).to contain_exactly(/\/unmanaged_1/, /\/subdir_unmanaged_1/)

expect(subject.stale_contents(recurse, exclusions, whitelist)).
to contain_exactly(/\/unmanaged_1/, /\/subdir_unmanaged_1/, /\/ignored_1/)
end

it 'does not collect contents that match recursive globbed exclusion items as intermediate values' do
recursive_exclusions = ['**/managed_subdir_1/**/*']
expect(subject.logger).not_to receive(:debug).with(/ignored_1/)

expect(subject.stale_contents(recurse, recursive_exclusions, whitelist)).
to contain_exactly(/\/unmanaged_2/, /\/managed_one\/unmanaged_1/)
end
end
end
Expand Down Expand Up @@ -179,7 +205,11 @@
end

context "recursive whitelist glob" do
let(:whitelist) { managed_directories.collect { |dir| File.join(dir, "**", "*unmanaged*") } }
let(:whitelist) do
managed_directories.flat_map do |dir|
[File.join(dir, "**", "*unmanaged*"), File.join(dir, "**", "managed_subdir_2")]
end
end
let(:purge_opts) { { recurse: true, whitelist: whitelist } }

describe '#purge!' do
Expand Down Expand Up @@ -214,7 +244,7 @@
context "when class does not implement #purge_exclusions" do
describe '#purge!' do
it 'purges normally' do
expect(FileUtils).to receive(:rm_r).at_least(3).times
expect(FileUtils).to receive(:rm_r).at_least(4).times

subject.purge!(purge_opts)
end
Expand Down

0 comments on commit 0e4d7f7

Please sign in to comment.