Skip to content

Commit

Permalink
Merge pull request #1081 from Magisus/concurrent-cache
Browse files Browse the repository at this point in the history
[#1058] Avoid concurrent cache updates when downloading modules on multiple threads
  • Loading branch information
mwaggett authored Jul 30, 2020
2 parents 617f137 + fbc8461 commit 48ac619
Show file tree
Hide file tree
Showing 12 changed files with 161 additions and 19 deletions.
8 changes: 3 additions & 5 deletions CHANGELOG.mkd
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,10 @@ CHANGELOG

Unreleased
----------
## Changes

(#823) Add filter_command configuration option for git repositories
(Thanks to [mhumpula](https://github.com/mhumpula) for the [feature](https://github.com/puppetlabs/r10k/pull/823/conflicts).)

----
- Add filter_command configuration option for git repositories. (Thanks to [mhumpula](https://github.com/mhumpula) for the feature.) [#823](https://github.com/puppetlabs/r10k/pull/823)
- Increase default pool_size to 4, allowing modules to be downloaded on 4 threads concurrently. [#1038](https://github.com/puppetlabs/r10k/issues/1038)
- Ensure that modules that share a cachedir download serially, to avoid cache corruption. [#1058](https://github.com/puppetlabs/r10k/issues/1058)

3.5.2
-----
Expand Down
6 changes: 3 additions & 3 deletions doc/dynamic-environments/configuration.mkd
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ The proxy server being used will be logged at the "debug" level when r10k runs.
### pool_size

The pool_size setting is a number to determine how many threads should be spawn
while updating modules. The default value is 1, which means the default behaviour
is to update modules in a serial manner. Increasing this number should bring
some performance gains.
while updating modules. The default value is 4, which means modules will be updated
in parallel. If this causes issues, change this setting to 1 to cause modules to be
updated serially.

### git

Expand Down
68 changes: 68 additions & 0 deletions integration/tests/git_source/git_source_repeated_remote.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
require 'git_utils'
require 'r10k_utils'
require 'master_manipulator'
test_name 'Verify the same remote can be used in more than one object'

env_path = on(master, puppet('config print environmentpath')).stdout.rstrip
r10k_fqp = get_r10k_fqp(master)
git_environments_path = '/root/environments'
git_repo_path = '/git_repos'
git_repo_name = 'environments'
git_control_remote = File.join(git_repo_path, "#{git_repo_name}.git")
code_dir = "#{env_path}/production"

last_commit = git_last_commit(master, git_environments_path)
git_provider = ENV['GIT_PROVIDER']
r10k_config_path = get_r10k_config_file_path(master)
r10k_config_bak_path = "#{r10k_config_path}.bak"
#In-line files
r10k_conf = <<-CONF
cachedir: '/var/cache/r10k'
git:
provider: '#{git_provider}'
sources:
control:
basedir: "#{env_path}"
remote: "#{git_control_remote}"
CONF

# Install the same module in two different places
puppetfile = <<-EOS
mod 'prod_apache',
:git => 'git://github.com/puppetlabs/puppetlabs-apache.git',
:branch => 'master'
mod 'test_apache',
:git => 'git://github.com/puppetlabs/puppetlabs-apache.git',
:branch => 'master'
EOS

teardown do
step 'Restore Original "r10k" Config'
on(master, "mv #{r10k_config_bak_path} #{r10k_config_path}")

clean_up_r10k(master, last_commit, git_environments_path)
end

step 'Stub the forge'
stub_forge_on(master)

step 'Backup Current "r10k" Config'
on(master, "mv #{r10k_config_path} #{r10k_config_bak_path}")

step 'Update the "r10k" Config'
create_remote_file(master, r10k_config_path, r10k_conf)

step 'Ask r10k to deploy'
on(master, "#{r10k_fqp} deploy environment -p")

step 'Add puppetfile with repeated remote'
create_remote_file(master, "#{git_environments_path}/Puppetfile", puppetfile)
git_add_commit_push(master, 'production', 'add Puppetfile', git_environments_path)

step 'Deploy r10k'
on(master, "#{r10k_fqp} deploy environment -p")

step 'Verify module was installed in both places'
on(master, "test -d #{code_dir}/modules/prod_apache")
on(master, "test -d #{code_dir}/modules/test_apache")
4 changes: 1 addition & 3 deletions lib/r10k/git/cache.rb
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,8 @@ def reset!

alias cached? exist?

private

# Reformat the remote name into something that can be used as a directory
def sanitized_dirname
@remote.gsub(/[^@\w\.-]/, '-')
@sanitized_dirname ||= @remote.gsub(/[^@\w\.-]/, '-')
end
end
4 changes: 4 additions & 0 deletions lib/r10k/git/stateful_repository.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ class R10K::Git::StatefulRepository
# @api private
attr_reader :repo

# @!attribute [r] cache
# @api private
attr_reader :cache

extend Forwardable
def_delegators :@repo, :head, :tracked_paths

Expand Down
8 changes: 8 additions & 0 deletions lib/r10k/module/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,14 @@ def properties
raise NotImplementedError
end

# Return the module's cachedir. Subclasses that implement a cache
# will override this to return a real directory location.
#
# @return [String, :none]
def cachedir
:none
end

private

def parse_title(title)
Expand Down
4 changes: 4 additions & 0 deletions lib/r10k/module/git.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ def status
@repo.status(version)
end

def cachedir
@repo.cache.sanitized_dirname
end

private

def validate_ref(desired, default)
Expand Down
22 changes: 19 additions & 3 deletions lib/r10k/puppetfile.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class Puppetfile

include R10K::Settings::Mixin

def_setting_attr :pool_size, 1
def_setting_attr :pool_size, 4

include R10K::Logging

Expand Down Expand Up @@ -42,6 +42,11 @@ class Puppetfile
# @return [Boolean] Overwrite any locally made changes
attr_accessor :force

# @!attribute [r] modules_by_vcs_cachedir
# @api private Only exposed for testing purposes
# @return [Hash{:none, String => Array<R10K::Module>}]
attr_reader :modules_by_vcs_cachedir

# @param [String] basedir
# @param [String] moduledir The directory to install the modules, default to #{basedir}/modules
# @param [String] puppetfile_path The path to the Puppetfile, default to #{basedir}/Puppetfile
Expand All @@ -58,6 +63,7 @@ def initialize(basedir, moduledir = nil, puppetfile_path = nil, puppetfile_name

@modules = []
@managed_content = {}
@modules_by_vcs_cachedir = {}
@forge = 'forgeapi.puppetlabs.com'

@loaded = false
Expand Down Expand Up @@ -137,6 +143,9 @@ def add_module(name, args)
mod.origin = 'Puppetfile'

@managed_content[install_path] << mod.name
cachedir = mod.cachedir
@modules_by_vcs_cachedir[cachedir] ||= []
@modules_by_vcs_cachedir[cachedir] << mod
@modules << mod
end

Expand Down Expand Up @@ -212,15 +221,22 @@ def concurrent_accept(visitor, pool_size)
def modules_queue(visitor)
Queue.new.tap do |queue|
visitor.visit(:puppetfile, self) do
modules.each { |mod| queue << mod }
modules_by_cachedir = modules_by_vcs_cachedir.clone
modules_without_vcs_cachedir = modules_by_cachedir.delete(:none) || []

modules_without_vcs_cachedir.each {|mod| queue << Array(mod) }
modules_by_cachedir.values.each {|mods| queue << mods }
end
end
end
public :modules_queue

def visitor_thread(visitor, mods_queue)
Thread.new do
begin
while mod = mods_queue.pop(true) do mod.accept(visitor) end
while mods = mods_queue.pop(true) do
mods.each {|mod| mod.accept(visitor) }
end
rescue ThreadError => e
logger.debug _("Module thread %{id} exiting: %{message}") % {message: e.message, id: Thread.current.object_id}
Thread.exit
Expand Down
2 changes: 1 addition & 1 deletion lib/r10k/settings.rb
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def self.global_settings

Definition.new(:pool_size, {
:desc => "The amount of threads used to concurrently install modules. The default value is 1: install one module at a time.",
:default => 1,
:default => 4,
:validate => lambda do |value|
if !value.is_a?(Integer)
raise ArgumentError, "The pool_size setting should be an integer, not a #{value.class}"
Expand Down
4 changes: 2 additions & 2 deletions spec/unit/action/deploy/module_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@

before do
allow(subject).to receive(:visit_environment).and_wrap_original do |original, environment, &block|
expect(environment.puppetfile).to receive(:modules).and_return(
[R10K::Module::Local.new(environment.name, '/fakedir', [], environment)]
expect(environment.puppetfile).to receive(:modules_by_vcs_cachedir).and_return(
{none: [R10K::Module::Local.new(environment.name, '/fakedir', [], environment)]}
)
original.call(environment, &block)
end
Expand Down
1 change: 1 addition & 0 deletions spec/unit/action/puppetfile/install_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def installer(opts = {}, argv = [], settings = {})
before do
allow(puppetfile).to receive(:purge!)
allow(puppetfile).to receive(:modules).and_return(modules)
allow(puppetfile).to receive(:modules_by_vcs_cachedir).and_return({none: modules})
end

it "syncs each module in the Puppetfile" do
Expand Down
49 changes: 47 additions & 2 deletions spec/unit/puppetfile_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,25 @@

expect { subject.add_module('puppet/test_module', module_opts) }.to raise_error(R10K::Error, /cannot manage content.*is not within/i).and not_change { subject.modules }
end

it "groups modules by vcs cache location" do
module_opts = { install_path: File.join(subject.basedir, 'vendor') }
opts1 = module_opts.merge(git: 'git@example.com:puppet/test_module.git')
opts2 = module_opts.merge(git: 'git@example.com:puppet/test_module_c.git')
sanitized_name1 = "git@example.com-puppet-test_module.git"
sanitized_name2 = "git@example.com-puppet-test_module_c.git"

subject.add_module('puppet/test_module_a', opts1)
subject.add_module('puppet/test_module_b', opts1)
subject.add_module('puppet/test_module_c', opts2)
subject.add_module('puppet/test_module_d', '1.2.3')

mods_by_cachedir = subject.modules_by_vcs_cachedir

expect(mods_by_cachedir[:none].length).to be 1
expect(mods_by_cachedir[sanitized_name1].length).to be 2
expect(mods_by_cachedir[sanitized_name2].length).to be 1
end
end

describe "#purge_exclusions" do
Expand Down Expand Up @@ -268,7 +287,7 @@ def expect_wrapped_error(orig, pf_path, wrapped_error)
mod2 = spy('module')
expect(mod2).to receive(:accept).with(visitor)

expect(subject).to receive(:modules).and_return([mod1, mod2])
expect(subject).to receive(:modules_by_vcs_cachedir).and_return({none: [mod1, mod2]})
subject.accept(visitor)
end

Expand All @@ -289,12 +308,38 @@ def expect_wrapped_error(orig, pf_path, wrapped_error)
mod2 = spy('module')
expect(mod2).to receive(:accept).with(visitor)

expect(subject).to receive(:modules).and_return([mod1, mod2])
expect(subject).to receive(:modules_by_vcs_cachedir).and_return({none: [mod1, mod2]})

expect(Thread).to receive(:new).exactly(pool_size).and_call_original
expect(Queue).to receive(:new).and_call_original

subject.accept(visitor)
end

it "Creates queues of modules grouped by cachedir" do
visitor = spy('visitor')
expect(visitor).to receive(:visit) do |type, other, &block|
expect(type).to eq :puppetfile
expect(other).to eq subject
block.call
end

mod1 = spy('module1')
mod2 = spy('module2')
mod3 = spy('module3')
mod4 = spy('module4')
mod5 = spy('module5')
mod6 = spy('module6')

expect(subject).to receive(:modules_by_vcs_cachedir)
.and_return({:none => [mod1, mod2],
"foo-cachedir" => [mod3, mod4],
"bar-cachedir" => [mod5, mod6]})

queue = subject.modules_queue(visitor)
expect(queue.length).to be 4
queue_array = 4.times.map { queue.pop }
expect(queue_array).to match_array([[mod1], [mod2], [mod3, mod4], [mod5, mod6]])
end
end
end

0 comments on commit 48ac619

Please sign in to comment.