Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements to the supervisor integration #1511

Merged
merged 1 commit into from
Apr 6, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions checks.d/supervisord.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,19 @@

FORMAT_TIME = lambda x: time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(x))

SERVER_SERVICE_CHECK = 'supervisord.can_connect'
PROCESS_SERVICE_CHECK = 'supervisord.process.status'

class SupervisordCheck(AgentCheck):

def check(self, instance):
server_name = instance.get('name')


if not server_name or not server_name.strip():
raise Exception("Supervisord server name not specified in yaml configuration.")
raise Exception("Supervisor server name not specified in yaml configuration.")

server_service_check_tags = ['%s:%s' % (SERVER_TAG, server_name)]
supe = self._connect(instance)
count_by_status = defaultdict(int)

Expand Down Expand Up @@ -79,9 +84,9 @@ def check(self, instance):
' has the right permissions.' % sock

if e.errno not in [errno.EACCES, errno.ENOENT]: # permissions denied, no such file
self.service_check('supervisord.server.check', AgentCheck.CRITICAL,
tags=['%s:%s' % (SERVER_TAG, server_name)],
message='Supervisord server %s is down.' % server_name)
self.service_check(SERVER_SERVICE_CHECK, AgentCheck.CRITICAL,
tags=server_service_check_tags,
message='Supervisor server %s is down.' % server_name)

raise Exception(msg)
except xmlrpclib.ProtocolError, e:
Expand All @@ -92,6 +97,10 @@ def check(self, instance):
raise Exception('An error occurred while connecting to %s: '
'%s %s ' % (server_name, e.errcode, e.errmsg))

# If we're here, we were able to connect to the server
self.service_check(SERVER_SERVICE_CHECK, AgentCheck.OK,
tags=server_service_check_tags)

# Report service checks and uptime for each process
for proc in processes:
proc_name = proc['name']
Expand All @@ -102,7 +111,7 @@ def check(self, instance):
status = DD_STATUS[proc['statename']]
msg = self._build_message(proc)
count_by_status[status] += 1
self.service_check('supervisord.process.check',
self.service_check(PROCESS_SERVICE_CHECK,
status, tags=tags, message=msg)
# Report Uptime
uptime = self._extract_uptime(proc)
Expand Down
69 changes: 41 additions & 28 deletions tests/test_supervisord.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from mock import patch
from nose.plugins.attrib import attr
from nose.tools import ok_, eq_
import unittest
import xmlrpclib

Expand Down Expand Up @@ -38,17 +37,21 @@ class TestSupervisordCheck(unittest.TestCase):
},
'expected_service_checks': {
'server1': [{
'status': AgentCheck.OK,
'tags': ['supervisord_server:server1'],
'check': 'supervisord.can_connect',
}, {
'status': AgentCheck.OK,
'tags': ['supervisord_server:server1', 'supervisord_process:mysql'],
'check': 'supervisord.process.check'
'check': 'supervisord.process.status'
}, {
'status': AgentCheck.CRITICAL,
'tags': ['supervisord_server:server1', 'supervisord_process:java'],
'check': 'supervisord.process.check'
'check': 'supervisord.process.status'
}, {
'status': AgentCheck.UNKNOWN,
'tags': ['supervisord_server:server1', 'supervisord_process:python'],
'check': 'supervisord.process.check'
'check': 'supervisord.process.status'
}]
}
}, {
Expand Down Expand Up @@ -94,18 +97,26 @@ class TestSupervisordCheck(unittest.TestCase):
},
'expected_service_checks': {
'server0': [{
'status': AgentCheck.OK,
'tags': ['supervisord_server:server0'],
'check': 'supervisord.can_connect',
}, {
'status': AgentCheck.CRITICAL,
'tags': ['supervisord_server:server0', 'supervisord_process:apache2'],
'check': 'supervisord.process.check'
'check': 'supervisord.process.status'
}, {
'status': AgentCheck.CRITICAL,
'tags': ['supervisord_server:server0', 'supervisord_process:webapp'],
'check': 'supervisord.process.check'
'check': 'supervisord.process.status'
}],
'server1': [{
'status': AgentCheck.OK,
'tags': ['supervisord_server:server1'],
'check': 'supervisord.can_connect',
}, {
'status': AgentCheck.CRITICAL,
'tags': ['supervisord_server:server1', 'supervisord_process:ruby'],
'check': 'supervisord.process.check'
'check': 'supervisord.process.status'
}]
}
}, {
Expand Down Expand Up @@ -167,9 +178,13 @@ class TestSupervisordCheck(unittest.TestCase):
},
'expected_service_checks': {
'server0': [{
'status': AgentCheck.OK,
'tags': ['supervisord_server:server0'],
'check': 'supervisord.can_connect',
}, {
'status': AgentCheck.OK,
'tags': ['supervisord_server:server0', 'supervisord_process:mysql'],
'check': 'supervisord.process.check'
'check': 'supervisord.process.status'
}]
}
}]
Expand All @@ -187,8 +202,8 @@ def test_check(self):
"""Integration test for supervisord check. Using a mocked supervisord."""
for tc in self.TEST_CASES:
check, instances = get_check('supervisord', tc['yaml'])
ok_(check is not None, msg=check)
eq_(tc['expected_instances'], instances)
self.assertTrue(check is not None, msg=check)
self.assertEquals(tc['expected_instances'], instances)
for instance in instances:
name = instance['name']

Expand All @@ -197,9 +212,9 @@ def test_check(self):
check.check(instance)
except Exception, e:
if 'error_message' in tc: # excepted error
eq_(str(e), tc['error_message'])
self.assertEquals(str(e), tc['error_message'])
else:
ok_(False, msg=str(e))
self.assertTrue(False, msg=str(e))
else:
# Assert that the check collected the right metrics
expected_metrics = tc['expected_metrics'][name]
Expand All @@ -216,12 +231,12 @@ def test_travis_supervisord(self):

# Load yaml config
config_str = open(os.environ['VOLATILE_DIR'] + '/supervisor/supervisord.yaml', 'r').read()
ok_(config_str is not None and len(config_str) > 0, msg=config_str)
self.assertTrue(config_str is not None and len(config_str) > 0, msg=config_str)

# init the check and get the instances
check, instances = get_check('supervisord', config_str)
ok_(check is not None, msg=check)
eq_(len(instances), 1)
self.assertTrue(check is not None, msg=check)
self.assertEquals(len(instances), 1)

# Supervisord should run 3 programs for 30, 60 and 90 seconds
# respectively. The tests below will ensure that the process count
Expand All @@ -232,7 +247,7 @@ def test_travis_supervisord(self):
check.check(instances[0])
except Exception, e:
# Make sure that it ran successfully
ok_(False, msg=str(e))
self.assertTrue(False, msg=str(e))
else:
up, down = 0, 0
for name, timestamp, value, meta in check.get_metrics():
Expand All @@ -241,8 +256,8 @@ def test_travis_supervisord(self):
up = value
elif 'status:down' in meta['tags']:
down = value
eq_(up, 3 - i)
eq_(down, i)
self.assertEquals(up, 3 - i)
self.assertEquals(down, i)
sleep(10)

# Unit Tests ###########################################################
Expand Down Expand Up @@ -278,30 +293,28 @@ def test_build_message(self):
Stop time: \nExit Status: 0"""

check, _ = get_check('supervisord', self.TEST_CASES[0]['yaml'])
eq_(expected_message, check._build_message(process))
self.assertEquals(expected_message, check._build_message(process))

# Helper Methods #######################################################

@staticmethod
def mock_server(url):
return MockXmlRcpServer(url)

@staticmethod
def assert_metrics(expected, actual):
def assert_metrics(self, expected, actual):
actual = [TestSupervisordCheck.norm_metric(metric) for metric in actual]
eq_(len(actual), len(expected), msg='Invalid # metrics reported.\n'
self.assertEquals(len(actual), len(expected), msg='Invalid # metrics reported.\n'
'Expected: {0}. Found: {1}'.format(len(expected), len(actual)))
ok_(all([expected_metric in actual for expected_metric in expected]),
self.assertTrue(all([expected_metric in actual for expected_metric in expected]),
msg='Reported metrics are incorrect.\nExpected: {0}.\n'
'Found: {1}'.format(expected, actual))

@staticmethod
def assert_service_checks(expected, actual):
def assert_service_checks(self, expected, actual):
actual = [TestSupervisordCheck.norm_service_check(service_check)
for service_check in actual]
eq_(len(actual), len(expected), msg='Invalid # service checks reported.'
'\nExpected: {0}. Found: {1}.'.format(len(expected), len(actual)))
ok_(all([expected_service_check in actual
self.assertEquals(len(actual), len(expected), msg='Invalid # service checks reported.'
'\nExpected: {0}. Found: {1}.'.format(expected, actual))
self.assertTrue(all([expected_service_check in actual
for expected_service_check in expected]),
msg='Reported service checks are incorrect.\nExpected:{0}\n'
'Found:{1}'.format(expected, actual))
Expand Down