From a23f0d06e8186b4f2ee334c2beb96739fad118a8 Mon Sep 17 00:00:00 2001 From: Remi Hakim Date: Mon, 6 Apr 2015 09:52:44 -0400 Subject: [PATCH] Improvements to the supervisor integration - Send status OK when we can connect to the daemon - Rename service check - Use proper nose API --- checks.d/supervisord.py | 19 ++++++++--- tests/test_supervisord.py | 69 +++++++++++++++++++++++---------------- 2 files changed, 55 insertions(+), 33 deletions(-) diff --git a/checks.d/supervisord.py b/checks.d/supervisord.py index 293304be86..c07d815a7c 100644 --- a/checks.d/supervisord.py +++ b/checks.d/supervisord.py @@ -35,14 +35,19 @@ FORMAT_TIME = lambda x: time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(x)) +SERVER_SERVICE_CHECK = 'supervisord.can_connect' +PROCESS_SERVICE_CHECK = 'supervisord.process.status' + class SupervisordCheck(AgentCheck): def check(self, instance): server_name = instance.get('name') + if not server_name or not server_name.strip(): - raise Exception("Supervisord server name not specified in yaml configuration.") + raise Exception("Supervisor server name not specified in yaml configuration.") + server_service_check_tags = ['%s:%s' % (SERVER_TAG, server_name)] supe = self._connect(instance) count_by_status = defaultdict(int) @@ -79,9 +84,9 @@ def check(self, instance): ' has the right permissions.' % sock if e.errno not in [errno.EACCES, errno.ENOENT]: # permissions denied, no such file - self.service_check('supervisord.server.check', AgentCheck.CRITICAL, - tags=['%s:%s' % (SERVER_TAG, server_name)], - message='Supervisord server %s is down.' % server_name) + self.service_check(SERVER_SERVICE_CHECK, AgentCheck.CRITICAL, + tags=server_service_check_tags, + message='Supervisor server %s is down.' % server_name) raise Exception(msg) except xmlrpclib.ProtocolError, e: @@ -92,6 +97,10 @@ def check(self, instance): raise Exception('An error occurred while connecting to %s: ' '%s %s ' % (server_name, e.errcode, e.errmsg)) + # If we're here, we were able to connect to the server + self.service_check(SERVER_SERVICE_CHECK, AgentCheck.OK, + tags=server_service_check_tags) + # Report service checks and uptime for each process for proc in processes: proc_name = proc['name'] @@ -102,7 +111,7 @@ def check(self, instance): status = DD_STATUS[proc['statename']] msg = self._build_message(proc) count_by_status[status] += 1 - self.service_check('supervisord.process.check', + self.service_check(PROCESS_SERVICE_CHECK, status, tags=tags, message=msg) # Report Uptime uptime = self._extract_uptime(proc) diff --git a/tests/test_supervisord.py b/tests/test_supervisord.py index 65cd789c70..b94a0f830b 100644 --- a/tests/test_supervisord.py +++ b/tests/test_supervisord.py @@ -4,7 +4,6 @@ from mock import patch from nose.plugins.attrib import attr -from nose.tools import ok_, eq_ import unittest import xmlrpclib @@ -38,17 +37,21 @@ class TestSupervisordCheck(unittest.TestCase): }, 'expected_service_checks': { 'server1': [{ + 'status': AgentCheck.OK, + 'tags': ['supervisord_server:server1'], + 'check': 'supervisord.can_connect', + }, { 'status': AgentCheck.OK, 'tags': ['supervisord_server:server1', 'supervisord_process:mysql'], - 'check': 'supervisord.process.check' + 'check': 'supervisord.process.status' }, { 'status': AgentCheck.CRITICAL, 'tags': ['supervisord_server:server1', 'supervisord_process:java'], - 'check': 'supervisord.process.check' + 'check': 'supervisord.process.status' }, { 'status': AgentCheck.UNKNOWN, 'tags': ['supervisord_server:server1', 'supervisord_process:python'], - 'check': 'supervisord.process.check' + 'check': 'supervisord.process.status' }] } }, { @@ -94,18 +97,26 @@ class TestSupervisordCheck(unittest.TestCase): }, 'expected_service_checks': { 'server0': [{ + 'status': AgentCheck.OK, + 'tags': ['supervisord_server:server0'], + 'check': 'supervisord.can_connect', + }, { 'status': AgentCheck.CRITICAL, 'tags': ['supervisord_server:server0', 'supervisord_process:apache2'], - 'check': 'supervisord.process.check' + 'check': 'supervisord.process.status' }, { 'status': AgentCheck.CRITICAL, 'tags': ['supervisord_server:server0', 'supervisord_process:webapp'], - 'check': 'supervisord.process.check' + 'check': 'supervisord.process.status' }], 'server1': [{ + 'status': AgentCheck.OK, + 'tags': ['supervisord_server:server1'], + 'check': 'supervisord.can_connect', + }, { 'status': AgentCheck.CRITICAL, 'tags': ['supervisord_server:server1', 'supervisord_process:ruby'], - 'check': 'supervisord.process.check' + 'check': 'supervisord.process.status' }] } }, { @@ -167,9 +178,13 @@ class TestSupervisordCheck(unittest.TestCase): }, 'expected_service_checks': { 'server0': [{ + 'status': AgentCheck.OK, + 'tags': ['supervisord_server:server0'], + 'check': 'supervisord.can_connect', + }, { 'status': AgentCheck.OK, 'tags': ['supervisord_server:server0', 'supervisord_process:mysql'], - 'check': 'supervisord.process.check' + 'check': 'supervisord.process.status' }] } }] @@ -187,8 +202,8 @@ def test_check(self): """Integration test for supervisord check. Using a mocked supervisord.""" for tc in self.TEST_CASES: check, instances = get_check('supervisord', tc['yaml']) - ok_(check is not None, msg=check) - eq_(tc['expected_instances'], instances) + self.assertTrue(check is not None, msg=check) + self.assertEquals(tc['expected_instances'], instances) for instance in instances: name = instance['name'] @@ -197,9 +212,9 @@ def test_check(self): check.check(instance) except Exception, e: if 'error_message' in tc: # excepted error - eq_(str(e), tc['error_message']) + self.assertEquals(str(e), tc['error_message']) else: - ok_(False, msg=str(e)) + self.assertTrue(False, msg=str(e)) else: # Assert that the check collected the right metrics expected_metrics = tc['expected_metrics'][name] @@ -216,12 +231,12 @@ def test_travis_supervisord(self): # Load yaml config config_str = open(os.environ['VOLATILE_DIR'] + '/supervisor/supervisord.yaml', 'r').read() - ok_(config_str is not None and len(config_str) > 0, msg=config_str) + self.assertTrue(config_str is not None and len(config_str) > 0, msg=config_str) # init the check and get the instances check, instances = get_check('supervisord', config_str) - ok_(check is not None, msg=check) - eq_(len(instances), 1) + self.assertTrue(check is not None, msg=check) + self.assertEquals(len(instances), 1) # Supervisord should run 3 programs for 30, 60 and 90 seconds # respectively. The tests below will ensure that the process count @@ -232,7 +247,7 @@ def test_travis_supervisord(self): check.check(instances[0]) except Exception, e: # Make sure that it ran successfully - ok_(False, msg=str(e)) + self.assertTrue(False, msg=str(e)) else: up, down = 0, 0 for name, timestamp, value, meta in check.get_metrics(): @@ -241,8 +256,8 @@ def test_travis_supervisord(self): up = value elif 'status:down' in meta['tags']: down = value - eq_(up, 3 - i) - eq_(down, i) + self.assertEquals(up, 3 - i) + self.assertEquals(down, i) sleep(10) # Unit Tests ########################################################### @@ -278,7 +293,7 @@ def test_build_message(self): Stop time: \nExit Status: 0""" check, _ = get_check('supervisord', self.TEST_CASES[0]['yaml']) - eq_(expected_message, check._build_message(process)) + self.assertEquals(expected_message, check._build_message(process)) # Helper Methods ####################################################### @@ -286,22 +301,20 @@ def test_build_message(self): def mock_server(url): return MockXmlRcpServer(url) - @staticmethod - def assert_metrics(expected, actual): + def assert_metrics(self, expected, actual): actual = [TestSupervisordCheck.norm_metric(metric) for metric in actual] - eq_(len(actual), len(expected), msg='Invalid # metrics reported.\n' + self.assertEquals(len(actual), len(expected), msg='Invalid # metrics reported.\n' 'Expected: {0}. Found: {1}'.format(len(expected), len(actual))) - ok_(all([expected_metric in actual for expected_metric in expected]), + self.assertTrue(all([expected_metric in actual for expected_metric in expected]), msg='Reported metrics are incorrect.\nExpected: {0}.\n' 'Found: {1}'.format(expected, actual)) - @staticmethod - def assert_service_checks(expected, actual): + def assert_service_checks(self, expected, actual): actual = [TestSupervisordCheck.norm_service_check(service_check) for service_check in actual] - eq_(len(actual), len(expected), msg='Invalid # service checks reported.' - '\nExpected: {0}. Found: {1}.'.format(len(expected), len(actual))) - ok_(all([expected_service_check in actual + self.assertEquals(len(actual), len(expected), msg='Invalid # service checks reported.' + '\nExpected: {0}. Found: {1}.'.format(expected, actual)) + self.assertTrue(all([expected_service_check in actual for expected_service_check in expected]), msg='Reported service checks are incorrect.\nExpected:{0}\n' 'Found:{1}'.format(expected, actual))