diff --git a/README.md b/README.md index cc86625..a8d2358 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ Four very basic primitives: Slightly more advanced primitives: * CheckFile: `check_file '/tmp/do_it'` will wait until the given file exists on the filesystem. This file is cleaned after. * WaitUntil: `wait_until "ping -c 1 google.com"` will wait until the command exit with a 0 status. This primitives supports string, mixlib/shellout instance and blocks. One can specify to run the wait_until in "before" or "cleanup" stages using the options (see code for details). -* ConsulLock: `consul_lock {path: '/lock/my_app', id: 'my_node', concurrency: 5}` will grab a lock from consul and release it afterwards. This primitive is based on optimistic concurrency rather than consul sessions. It uses `finish` block to release the lock ensuring that the lock release happens after all cleanup blocks. It is also possible to specify the `:datacenter` option to take the lock in another datacenter. +* ConsulLock: `consul_lock {path: '/lock/my_app', id: 'my_node', concurrency: 5}` will grab a lock from consul and release it afterwards. This primitive is based on optimistic concurrency rather than consul sessions. It uses `finish` block to release the lock ensuring that the lock release happens after all cleanup blocks. It is also possible to specify the `:datacenter` option to take the lock in another datacenter as well as `:max_wait` to fail the chef run if the lock cannot be acquired after the given number of seconds. * ConsulRackLock: `consul_rack_lock {path: '/lock/my_app', id: 'my_node', rack: 'my_rack_id', concurrency: 2}` will grab a lock from consul and release it afterwards. This has the same properties as ConsulLock but will allow in node to enter if another node with the same rack is already under the lock. Concurrency level is on the number of concurrent racks (not on concurrent nodes per rack). * ConsulMaintenance: `consul_maintenance reason: 'My reason', token: 'foo'` will enable maintenance mode on the consul agent before the choregraphie starts. `consul_maintenance service_id: 'consul service_id', reason: 'My reason', token: 'foo'` will enable maintenance mode on the consul service before the choregraphie starts. diff --git a/libraries/primitive_consul_lock.rb b/libraries/primitive_consul_lock.rb index 96426fb..bd1e50a 100644 --- a/libraries/primitive_consul_lock.rb +++ b/libraries/primitive_consul_lock.rb @@ -6,6 +6,11 @@ # This primitive is based on optimistic concurrency (using compare-and-swap) rather than consul sessions. # It allows to support the unavailability of the local consul agent (for reboot, reinstall, ...) module Choregraphie + class ConsulError < StandardError + def initialize(msg) + super + end + end class ConsulLock < Primitive def initialize(options = {}, &block) @options = Mash.new(options) @@ -20,6 +25,7 @@ def initialize(options = {}, &block) end @options[:backoff] ||= 5 # seconds + @options[:max_wait] ||= false ConsulCommon.setup_consul(@options) @@ -66,12 +72,18 @@ def backoff def wait_until(action, opts = {}) dc = "(in #{@options[:datacenter]})" if @options[:datacenter] Chef::Log.info "Will #{action} the lock #{path} #{dc}" + + start_time = Time.now success = 0.upto(opts[:max_failures] || Float::INFINITY).any? do |tries| begin - yield || backoff + yield rescue => e Chef::Log.warn "Error while #{action}-ing lock" Chef::Log.warn e + elapsed = Time.now - start_time + if @options[:max_wait] && elapsed > @options[:max_wait] + raise "Max time of #{@options[:max_wait]} reached while #{action}-ing lock, failing" + end backoff end end @@ -159,16 +171,14 @@ def enter_lock(opts) end def enter(opts) - return true if already_entered?(opts) + return if already_entered?(opts) if can_enter_lock?(opts) enter_lock(opts) require 'diplomat' result = Diplomat::Kv.put(@path, to_json, cas: @cas, dc: @dc, token: @token) - Chef::Log.debug("Someone updated the lock at the same time, will retry") unless result - result + raise ConsulError.new("Someone updated the lock at the same time, will retry") unless result else - Chef::Log.debug("Too many lock holders (concurrency:#{concurrency})") - false + raise ConsulError.new("Too many lock holders (concurrency:#{concurrency})") end end @@ -182,10 +192,7 @@ def exit(opts) exit_lock(opts) require 'diplomat' result = Diplomat::Kv.put(@path, to_json, cas: @cas, dc: @dc, token: @token) - Chef::Log.debug("Someone updated the lock at the same time, will retry") unless result - result - else - true + raise ConsulError.new("Someone updated the lock at the same time, will retry") unless result end end diff --git a/spec/unit/lock_spec.rb b/spec/unit/lock_spec.rb index 8a667a6..07e90f7 100644 --- a/spec/unit/lock_spec.rb +++ b/spec/unit/lock_spec.rb @@ -73,14 +73,14 @@ def self.debug(_); end with(:body => /"holders":{"another_node":12345,"myself":".*"}/). to_return(status: 200, body: "true") - expect(lock.enter(name: 'myself')).to be true + expect { lock.enter(name: 'myself') }.to_not raise_error end it 'cannot enter the lock if it has been modified' do stub_request(:put, "http://localhost:8500/v1/kv/check-lock/my_lock?cas=42"). with(:body => /"holders":{"another_node":12345,"myself":".*"}/). to_return(status: 200, body: "false") - expect(lock.enter(name: 'myself')).to be false + expect { lock.enter(name: 'myself') }.to raise_error(Choregraphie::ConsulError) end it 'updates concurrency' do @@ -88,19 +88,19 @@ def self.debug(_); end with(:body => /"concurrency":5,.*"holders":{"another_node":12345,"myself":".*"}/). to_return(status: 200, body: "true") - expect(lock.enter(name: 'myself')).to be true + expect { lock.enter(name: 'myself') }.to_not raise_error end end context 'when lock is full' do it 'cannot take the lock' do - expect(full_lock.enter(name: 'myself')).to be false + expect { full_lock.enter(name: 'myself') }.to raise_error(Choregraphie::ConsulError) end end context 'when lock is already taken' do it 'is re-entrant' do - expect(full_lock.enter(name: 'another_node')).to be true + expect { full_lock.enter(name: 'another_node') }.to_not raise_error end end end @@ -111,7 +111,7 @@ def self.debug(_); end with(body: /{"version":1,"concurrency":5,"holders":{}}/). to_return(status: 200, body: "true") - expect(lock.exit(name: 'another_node')).to be true + expect { lock.exit(name: 'another_node') }.to_not raise_error end it 'return false if it can\'t exit the lock' do @@ -119,7 +119,7 @@ def self.debug(_); end with(body: /{"version":1,"concurrency":5,"holders":{}}/). to_return(status: 200, body: "false") - expect(lock.exit(name: 'another_node')).to be false + expect { lock.exit(name: 'another_node') }.to raise_error(Choregraphie::ConsulError) end end end diff --git a/spec/unit/rack_lock_spec.rb b/spec/unit/rack_lock_spec.rb index 0434dea..ed9e610 100644 --- a/spec/unit/rack_lock_spec.rb +++ b/spec/unit/rack_lock_spec.rb @@ -82,14 +82,14 @@ def self.debug(_); end .with(body: /"holders":{"another_rack":{"another_node":12345},"my_rack":{"myself":".*"}/) .to_return(status: 200, body: 'true') - expect(lock.enter(name: 'my_rack', server: 'myself')).to be true + expect { lock.enter(name: 'my_rack', server: 'myself') }.not_to raise_error end it 'cannot enter the lock if it has been modified' do stub_request(:put, 'http://localhost:8500/v1/kv/check-lock/my_lock?cas=42') .with(body: /"holders":{"another_rack":{"another_node":12345},"my_rack":{"myself":".*"}/) .to_return(status: 200, body: 'false') - expect(lock.enter(name: 'my_rack', server: 'myself')).to be false + expect { lock.enter(name: 'my_rack', server: 'myself') }.to raise_error(Choregraphie::ConsulError) end it 'updates concurrency' do @@ -97,26 +97,26 @@ def self.debug(_); end .with(body: /"concurrency":5,"holders":{"another_rack":{"another_node":12345},"my_rack":{"myself":".*"}/) .to_return(status: 200, body: 'true') - expect(lock.enter(name: 'my_rack', server: 'myself')).to be true + expect { lock.enter(name: 'my_rack', server: 'myself') }.not_to raise_error end end context 'when lock is full' do it 'cannot take the lock' do - expect(full_lock.enter(name: 'my_rack', server: 'myself')).to be false + expect { full_lock.enter(name: 'my_rack', server: 'myself') }.to raise_error(Choregraphie::ConsulError) end end context 'when lock is already taken' do it 'is re-entrant' do - expect(full_lock.enter(name: 'another_rack', server: 'another_node')).to be true + expect { full_lock.enter(name: 'another_rack', server: 'another_node') }.not_to raise_error end it 'allows any other node to enter' do stub_request(:put, 'http://localhost:8500/v1/kv/check-lock/my_lock?cas=42') .with(body: /"holders":{"another_rack":{"another_node":12345,"super_random_name":.*}}/) .to_return(status: 200, body: 'true') - expect(full_lock.enter(name: 'another_rack', server: 'super_random_name')).to be true + expect { full_lock.enter(name: 'another_rack', server: 'super_random_name') }.not_to raise_error end end end @@ -127,7 +127,7 @@ def self.debug(_); end .with(body: /{"version":1,"concurrency":5,"holders":{}}/) .to_return(status: 200, body: 'true') - expect(lock.exit(name: 'another_rack', server: 'another_node')).to be true + expect { lock.exit(name: 'another_rack', server: 'another_node') }.not_to raise_error end it %(return false if it can't exit the lock) do @@ -135,7 +135,7 @@ def self.debug(_); end .with(body: /{"version":1,"concurrency":5,"holders":{}}/) .to_return(status: 200, body: 'false') - expect(lock.exit(name: 'another_rack', server: 'another_node')).to be false + expect{ lock.exit(name: 'another_rack', server: 'another_node') }.to raise_error(Choregraphie::ConsulError) end it %(does not exit lock for the whole rack if other nodes from the same rack hold it) do @@ -143,7 +143,7 @@ def self.debug(_); end .with(body: '{"version":1,"concurrency":5,"holders":{"my_rack":{"another_node":123456}}}') .to_return(status: 200, body: 'true') - expect(taken_lock.exit(name: 'my_rack', server: 'myself')).to be true + expect { taken_lock.exit(name: 'my_rack', server: 'myself') }.not_to raise_error end end end