diff --git a/.travis.yml b/.travis.yml index 255f90bfa1..c543ca4e39 100644 --- a/.travis.yml +++ b/.travis.yml @@ -73,6 +73,7 @@ env: # hugepages are enabled # - TRAVIS_FLAVOR=tokumx - TRAVIS_FLAVOR=tomcat # JMX testing machine / need the other ones before + - TRAVIS_FLAVOR=zookeeper # Override travis defaults with empty jobs before_install: echo "OVERRIDING TRAVIS STEPS" diff --git a/Rakefile b/Rakefile index d0c294d300..eb903cc863 100755 --- a/Rakefile +++ b/Rakefile @@ -30,6 +30,7 @@ require './ci/supervisord' require './ci/sysstat' require './ci/tokumx' require './ci/tomcat' +require './ci/zookeeper' CLOBBER.include '**/*.pyc' diff --git a/checks.d/zk.py b/checks.d/zk.py index 3c9d7ca1a8..260a23d111 100644 --- a/checks.d/zk.py +++ b/checks.d/zk.py @@ -32,6 +32,7 @@ # project from checks import AgentCheck + class ZKConnectionFailure(Exception): """ Raised when we are unable to connect or get the output of a command. """ pass @@ -73,6 +74,7 @@ def check(self, instance): stat_out = self._send_command('stat', *cx_args) except ZKConnectionFailure: self.increment('zookeeper.timeouts') + raise else: # Parse the response metrics, new_tags, mode = self.parse_stat(stat_out) @@ -87,7 +89,8 @@ def check(self, instance): message = u"Server is in %s mode" % mode else: status = AgentCheck.CRITICAL - message = u"Server is in %s mode but check expects %s mode" % (expected_mode, mode) + message = u"Server is in %s mode but check expects %s mode"\ + % (mode, expected_mode) self.service_check('zookeeper.mode', status, message=message) def _send_command(self, command, host, port, timeout): @@ -110,7 +113,8 @@ def _send_command(self, command, host, port, timeout): while chunk: if num_reads > max_reads: # Safeguard against an infinite loop - raise Exception("Read %s bytes before exceeding max reads of %s. " % (buf.tell(), max_reads)) + raise Exception("Read %s bytes before exceeding max reads of %s. " + % (buf.tell(), max_reads)) chunk = sock.recv(chunk_size) buf.write(chunk) num_reads += 1 diff --git a/ci/resources/zookeeper/zoo.cfg b/ci/resources/zookeeper/zoo.cfg new file mode 100644 index 0000000000..938574d0ff --- /dev/null +++ b/ci/resources/zookeeper/zoo.cfg @@ -0,0 +1,3 @@ +tickTime=2000 +dataDir=VOLATILE_DIR/zookeeper +clientPort=2181 diff --git a/ci/zookeeper.rb b/ci/zookeeper.rb new file mode 100644 index 0000000000..0a25c627b1 --- /dev/null +++ b/ci/zookeeper.rb @@ -0,0 +1,72 @@ +require './ci/common' + +def zk_version + ENV['FLAVOR_VERSION'] || '3.4.6' +end + +def zk_rootdir + "#{ENV['INTEGRATIONS_DIR']}/zk_#{zk_version}" +end + +namespace :ci do + namespace :zookeeper do |flavor| + task :before_install => ['ci:common:before_install'] + + task :install => ['ci:common:install'] do + unless Dir.exist? File.expand_path(zk_rootdir) + sh %(curl -s -L\ + -o $VOLATILE_DIR/zookeeper-#{zk_version}.tar.gz\ + http://mirror.cogentco.com/pub/apache/zookeeper/zookeeper-#{zk_version}/zookeeper-#{zk_version}.tar.gz) + sh %(mkdir -p #{zk_rootdir}) + sh %(tar zxf $VOLATILE_DIR/zookeeper-#{zk_version}.tar.gz\ + -C #{zk_rootdir} --strip-components=1) + end + end + + task :before_script => ['ci:common:before_script'] do + sh %(mkdir -p $VOLATILE_DIR/zookeeper) + sh %(cp $TRAVIS_BUILD_DIR/ci/resources/zookeeper/zoo.cfg\ + #{zk_rootdir}/conf/) + sh %(#{zk_rootdir}/bin/zkServer.sh start) + end + + task :script => ['ci:common:script'] do + this_provides = [ + 'zookeeper' + ] + Rake::Task['ci:common:run_tests'].invoke(this_provides) + end + + task :before_cache => ['ci:common:before_cache'] + + task :cache => ['ci:common:cache'] + + task :cleanup => ['ci:common:cleanup'] do + sh %(#{zk_rootdir}/bin/zkServer.sh stop) + end + + task :execute do + exception = nil + begin + %w(before_install install before_script script).each do |t| + Rake::Task["#{flavor.scope.path}:#{t}"].invoke + end + rescue => e + exception = e + puts "Failed task: #{e.class} #{e.message}".red + end + if ENV['SKIP_CLEANUP'] + puts 'Skipping cleanup, disposable environments are great'.yellow + else + puts 'Cleaning up' + Rake::Task["#{flavor.scope.path}:cleanup"].invoke + end + if ENV['TRAVIS'] + %w(before_cache cache).each do |t| + Rake::Task["#{flavor.scope.path}:#{t}"].invoke + end + end + fail exception if exception + end + end +end diff --git a/tests/test_zookeeper.py b/tests/test_zookeeper.py index 9242d510fe..db6a003dec 100644 --- a/tests/test_zookeeper.py +++ b/tests/test_zookeeper.py @@ -1,137 +1,94 @@ # stdlib -import os -from StringIO import StringIO -import unittest - -# 3p -from mock import patch +from nose.plugins.attrib import attr # project from checks import AgentCheck -from tests.common import get_check - -CONFIG = """ -init_config: - -instances: - - host: 127.0.0.1 - port: 2181 - expected_mode: follower - tags: [] -""" - -CONFIG2 = """ -init_config: - -instances: - - host: 127.0.0.1 - port: 2182 - tags: [] - expected_mode: leader -""" - -def send_command_lt_v344(cmd, *args): - if cmd == 'stat': - return StringIO("""Zookeeper version: 3.2.2--1, built on 03/16/2010 07:31 GMT - Clients: - /10.42.114.160:32634[1](queued=0,recved=12,sent=0) - /10.37.137.74:21873[1](queued=0,recved=53613,sent=0) - /10.37.137.74:21876[1](queued=0,recved=57436,sent=0) - /10.115.77.32:32990[1](queued=0,recved=16,sent=0) - /10.37.137.74:21891[1](queued=0,recved=55011,sent=0) - /10.37.137.74:21797[1](queued=0,recved=19431,sent=0) - -Latency min/avg/max: -10/0/20007 -Received: 101032173 -Sent: 0 -Outstanding: 0 -Zxid: 0x1034799c7 -Mode: leader -Node count: 487 -""") - else: - return StringIO() - -def send_command_gte_v344(cmd, *args): - if cmd == 'stat': - return StringIO("""Zookeeper version: 3.4.5--1, built on 03/16/2010 07:31 GMT -Clients: - /10.42.114.160:32634[1](queued=0,recved=12,sent=0) - /10.37.137.74:21873[1](queued=0,recved=53613,sent=0) - /10.37.137.74:21876[1](queued=0,recved=57436,sent=0) - /10.115.77.32:32990[1](queued=0,recved=16,sent=0) - /10.37.137.74:21891[1](queued=0,recved=55011,sent=0) - /10.37.137.74:21797[1](queued=0,recved=19431,sent=0) - -Latency min/avg/max: -10/0/20007 -Received: 101032173 -Sent: 0 -Connections: 1 -Outstanding: 0 -Zxid: 0x1034799c7 -Mode: leader -Node count: 487 -""") - else: - return StringIO() - -class TestZookeeper(unittest.TestCase): - def test_zk_stat_parsing_lt_v344(self): - zk, instances = get_check('zk', CONFIG) - expected = [ - ('zookeeper.latency.min', -10), - ('zookeeper.latency.avg', 0), - ('zookeeper.latency.max', 20007), - ('zookeeper.bytes_received', 101032173L), - ('zookeeper.bytes_sent', 0L), - ('zookeeper.connections', 6), - ('zookeeper.bytes_outstanding', 0L), - ('zookeeper.outstanding_requests', 0L), - ('zookeeper.zxid.epoch', 1), - ('zookeeper.zxid.count', 55024071), - ('zookeeper.nodes', 487L), - ] - - - with patch.object(zk, '_send_command', send_command_lt_v344): - zk.check(instances[0]) - - service_checks = zk.get_service_checks() - self.assertEquals(service_checks[0]['check'], 'zookeeper.ruok') - self.assertEquals(service_checks[1]['check'], 'zookeeper.mode') - self.assertEquals(service_checks[1]['status'], AgentCheck.CRITICAL) - - metrics = zk.get_metrics() - self.assertEquals(sorted([(name, val) for name, _, val, _ in metrics]), sorted(expected)) - self.assertEquals(len(service_checks), 2) - self.assertEquals(metrics[0][3]['tags'], ['mode:leader']) - - def test_zk_stat_parsing_gte_v344(self): - zk, instances = get_check('zk', CONFIG2) - expected = [ - ('zookeeper.latency.min', -10), - ('zookeeper.latency.avg', 0), - ('zookeeper.latency.max', 20007), - ('zookeeper.bytes_received', 101032173L), - ('zookeeper.bytes_sent', 0L), - ('zookeeper.connections', 1), - ('zookeeper.bytes_outstanding', 0L), - ('zookeeper.outstanding_requests', 0L), - ('zookeeper.zxid.epoch', 1), - ('zookeeper.zxid.count', 55024071), - ('zookeeper.nodes', 487L), - ] - - - with patch.object(zk, '_send_command', send_command_gte_v344): - zk.check(instances[0]) - service_checks = zk.get_service_checks() - self.assertEquals(len(service_checks), 2) - self.assertEquals(service_checks[0]['check'], 'zookeeper.ruok') - self.assertEquals(service_checks[0]['status'], AgentCheck.WARNING) - self.assertEquals(service_checks[1]['check'], 'zookeeper.mode') - self.assertEquals(service_checks[1]['status'], AgentCheck.OK) - - metrics = zk.get_metrics() - self.assertEquals(metrics[0][3]['tags'], ['mode:leader']) - self.assertEquals(sorted([(name, val) for name, _, val, _ in metrics]), sorted(expected)) +from tests.common import AgentCheckTest + + +@attr(requires='zookeeper') +class ZooKeeperTestCase(AgentCheckTest): + CHECK_NAME = 'zk' + + CONFIG = { + 'host': "127.0.0.1", + 'port': 2181, + 'expected_mode': "standalone", + 'tags': ["mytag"] + } + + WRONG_EXPECTED_MODE = { + 'host': "127.0.0.1", + 'port': 2181, + 'expected_mode': "follower", + 'tags': [] + } + + CONNECTION_FAILURE_CONFIG = { + 'host': "127.0.0.1", + 'port': 2182, + 'expected_mode': "follower", + 'tags': [] + } + + METRICS = [ + 'zookeeper.latency.min', + 'zookeeper.latency.avg', + 'zookeeper.latency.max', + 'zookeeper.bytes_received', + 'zookeeper.bytes_sent', + 'zookeeper.connections', + 'zookeeper.connections', + 'zookeeper.bytes_outstanding', + 'zookeeper.outstanding_requests', + 'zookeeper.zxid.epoch', + 'zookeeper.zxid.count', + 'zookeeper.nodes', + ] + + def test_check(self): + """ + Collect ZooKeeper metrics. + """ + config = { + 'instances': [self.CONFIG] + } + self.run_check(config) + + # Test metrics + for mname in self.METRICS: + self.assertMetric(mname, tags=["mode:standalone", "mytag"], count=1) + + # Test service checks + self.assertServiceCheck("zookeeper.ruok", status=AgentCheck.OK) + self.assertServiceCheck("zookeeper.mode", status=AgentCheck.OK) + + self.coverage_report() + + def test_wrong_expected_mode(self): + """ + Raise a 'critical' service check when ZooKeeper is not in the expected mode + """ + config = { + 'instances': [self.WRONG_EXPECTED_MODE] + } + self.run_check(config) + + # Test service checks + self.assertServiceCheck("zookeeper.mode", status=AgentCheck.CRITICAL) + + def test_error_state(self): + """ + Raise a 'critical' service check when ZooKeeper is in an error state + """ + config = { + 'instances': [self.CONNECTION_FAILURE_CONFIG] + } + + self.assertRaises( + Exception, + lambda: self.run_check(config) + ) + + # Test service checks + self.assertServiceCheck("zookeeper.ruok", status=AgentCheck.CRITICAL)