From a23f0d06e8186b4f2ee334c2beb96739fad118a8 Mon Sep 17 00:00:00 2001
From: Remi Hakim <remi@datadoghq.com>
Date: Mon, 6 Apr 2015 09:52:44 -0400
Subject: [PATCH] Improvements to the supervisor integration

- Send status OK when we can connect to the daemon
- Rename service check
- Use proper nose API
---
 checks.d/supervisord.py   | 19 ++++++++---
 tests/test_supervisord.py | 69 +++++++++++++++++++++++----------------
 2 files changed, 55 insertions(+), 33 deletions(-)

diff --git a/checks.d/supervisord.py b/checks.d/supervisord.py
index 293304be86..c07d815a7c 100644
--- a/checks.d/supervisord.py
+++ b/checks.d/supervisord.py
@@ -35,14 +35,19 @@
 
 FORMAT_TIME = lambda x: time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(x))
 
+SERVER_SERVICE_CHECK = 'supervisord.can_connect'
+PROCESS_SERVICE_CHECK = 'supervisord.process.status'
+
 class SupervisordCheck(AgentCheck):
 
     def check(self, instance):
         server_name = instance.get('name')
 
+
         if not server_name or not server_name.strip():
-            raise Exception("Supervisord server name not specified in yaml configuration.")
+            raise Exception("Supervisor server name not specified in yaml configuration.")
 
+        server_service_check_tags = ['%s:%s' % (SERVER_TAG, server_name)]
         supe = self._connect(instance)
         count_by_status = defaultdict(int)
 
@@ -79,9 +84,9 @@ def check(self, instance):
                     ' has the right permissions.' % sock
 
             if e.errno not in [errno.EACCES, errno.ENOENT]: # permissions denied, no such file
-                self.service_check('supervisord.server.check', AgentCheck.CRITICAL,
-                                   tags=['%s:%s' % (SERVER_TAG, server_name)],
-                                   message='Supervisord server %s is down.' % server_name)
+                self.service_check(SERVER_SERVICE_CHECK, AgentCheck.CRITICAL,
+                                   tags=server_service_check_tags,
+                                   message='Supervisor server %s is down.' % server_name)
 
             raise Exception(msg)
         except xmlrpclib.ProtocolError, e:
@@ -92,6 +97,10 @@ def check(self, instance):
                 raise Exception('An error occurred while connecting to %s: '
                                 '%s %s ' % (server_name, e.errcode, e.errmsg))
 
+        # If we're here, we were able to connect to the server
+        self.service_check(SERVER_SERVICE_CHECK, AgentCheck.OK,
+            tags=server_service_check_tags)
+
         # Report service checks and uptime for each process
         for proc in processes:
             proc_name = proc['name']
@@ -102,7 +111,7 @@ def check(self, instance):
             status = DD_STATUS[proc['statename']]
             msg = self._build_message(proc)
             count_by_status[status] += 1
-            self.service_check('supervisord.process.check',
+            self.service_check(PROCESS_SERVICE_CHECK,
                                status, tags=tags, message=msg)
             # Report Uptime
             uptime = self._extract_uptime(proc)
diff --git a/tests/test_supervisord.py b/tests/test_supervisord.py
index 65cd789c70..b94a0f830b 100644
--- a/tests/test_supervisord.py
+++ b/tests/test_supervisord.py
@@ -4,7 +4,6 @@
 
 from mock import patch
 from nose.plugins.attrib import attr
-from nose.tools import ok_, eq_
 import unittest
 import xmlrpclib
 
@@ -38,17 +37,21 @@ class TestSupervisordCheck(unittest.TestCase):
         },
         'expected_service_checks': {
             'server1': [{
+                'status': AgentCheck.OK,
+                'tags': ['supervisord_server:server1'],
+                'check': 'supervisord.can_connect',
+            }, {
                 'status': AgentCheck.OK,
                 'tags': ['supervisord_server:server1', 'supervisord_process:mysql'],
-                'check': 'supervisord.process.check'
+                'check': 'supervisord.process.status'
             }, {
                 'status': AgentCheck.CRITICAL,
                 'tags': ['supervisord_server:server1', 'supervisord_process:java'],
-                'check': 'supervisord.process.check'
+                'check': 'supervisord.process.status'
             }, {
                 'status': AgentCheck.UNKNOWN,
                 'tags': ['supervisord_server:server1', 'supervisord_process:python'],
-                'check': 'supervisord.process.check'
+                'check': 'supervisord.process.status'
             }]
         }
     }, {
@@ -94,18 +97,26 @@ class TestSupervisordCheck(unittest.TestCase):
         },
         'expected_service_checks': {
             'server0': [{
+                'status': AgentCheck.OK,
+                'tags': ['supervisord_server:server0'],
+                'check': 'supervisord.can_connect',
+            }, {
                 'status': AgentCheck.CRITICAL,
                 'tags': ['supervisord_server:server0', 'supervisord_process:apache2'],
-                'check': 'supervisord.process.check'
+                'check': 'supervisord.process.status'
             }, {
                 'status': AgentCheck.CRITICAL,
                 'tags': ['supervisord_server:server0', 'supervisord_process:webapp'],
-                'check': 'supervisord.process.check'
+                'check': 'supervisord.process.status'
             }],
             'server1': [{
+                'status': AgentCheck.OK,
+                'tags': ['supervisord_server:server1'],
+                'check': 'supervisord.can_connect',
+            }, {
                 'status': AgentCheck.CRITICAL,
                 'tags': ['supervisord_server:server1', 'supervisord_process:ruby'],
-                'check': 'supervisord.process.check'
+                'check': 'supervisord.process.status'
             }]
         }
     }, {
@@ -167,9 +178,13 @@ class TestSupervisordCheck(unittest.TestCase):
         },
         'expected_service_checks': {
             'server0': [{
+                'status': AgentCheck.OK,
+                'tags': ['supervisord_server:server0'],
+                'check': 'supervisord.can_connect',
+            }, {
                 'status': AgentCheck.OK,
                 'tags': ['supervisord_server:server0', 'supervisord_process:mysql'],
-                'check': 'supervisord.process.check'
+                'check': 'supervisord.process.status'
             }]
         }
     }]
@@ -187,8 +202,8 @@ def test_check(self):
         """Integration test for supervisord check. Using a mocked supervisord."""
         for tc in self.TEST_CASES:
             check, instances = get_check('supervisord', tc['yaml'])
-            ok_(check is not None, msg=check)
-            eq_(tc['expected_instances'], instances)
+            self.assertTrue(check is not None, msg=check)
+            self.assertEquals(tc['expected_instances'], instances)
             for instance in instances:
                 name = instance['name']
 
@@ -197,9 +212,9 @@ def test_check(self):
                     check.check(instance)
                 except Exception, e:
                     if 'error_message' in tc:  # excepted error
-                        eq_(str(e), tc['error_message'])
+                        self.assertEquals(str(e), tc['error_message'])
                     else:
-                        ok_(False, msg=str(e))
+                        self.assertTrue(False, msg=str(e))
                 else:
                     # Assert that the check collected the right metrics
                     expected_metrics = tc['expected_metrics'][name]
@@ -216,12 +231,12 @@ def test_travis_supervisord(self):
 
         # Load yaml config
         config_str = open(os.environ['VOLATILE_DIR'] + '/supervisor/supervisord.yaml', 'r').read()
-        ok_(config_str is not None and len(config_str) > 0, msg=config_str)
+        self.assertTrue(config_str is not None and len(config_str) > 0, msg=config_str)
 
         # init the check and get the instances
         check, instances = get_check('supervisord', config_str)
-        ok_(check is not None, msg=check)
-        eq_(len(instances), 1)
+        self.assertTrue(check is not None, msg=check)
+        self.assertEquals(len(instances), 1)
 
         # Supervisord should run 3 programs for 30, 60 and 90 seconds
         # respectively. The tests below will ensure that the process count
@@ -232,7 +247,7 @@ def test_travis_supervisord(self):
                 check.check(instances[0])
             except Exception, e:
                 # Make sure that it ran successfully
-                ok_(False, msg=str(e))
+                self.assertTrue(False, msg=str(e))
             else:
                 up, down = 0, 0
                 for name, timestamp, value, meta in check.get_metrics():
@@ -241,8 +256,8 @@ def test_travis_supervisord(self):
                             up = value
                         elif 'status:down' in meta['tags']:
                             down = value
-                eq_(up, 3 - i)
-                eq_(down, i)
+                self.assertEquals(up, 3 - i)
+                self.assertEquals(down, i)
                 sleep(10)
 
     # Unit Tests ###########################################################
@@ -278,7 +293,7 @@ def test_build_message(self):
 Stop time: \nExit Status: 0"""
 
         check, _ = get_check('supervisord', self.TEST_CASES[0]['yaml'])
-        eq_(expected_message, check._build_message(process))
+        self.assertEquals(expected_message, check._build_message(process))
 
     # Helper Methods #######################################################
 
@@ -286,22 +301,20 @@ def test_build_message(self):
     def mock_server(url):
         return MockXmlRcpServer(url)
 
-    @staticmethod
-    def assert_metrics(expected, actual):
+    def assert_metrics(self, expected, actual):
         actual = [TestSupervisordCheck.norm_metric(metric) for metric in actual]
-        eq_(len(actual), len(expected), msg='Invalid # metrics reported.\n'
+        self.assertEquals(len(actual), len(expected), msg='Invalid # metrics reported.\n'
             'Expected: {0}. Found: {1}'.format(len(expected), len(actual)))
-        ok_(all([expected_metric in actual for expected_metric in expected]),
+        self.assertTrue(all([expected_metric in actual for expected_metric in expected]),
             msg='Reported metrics are incorrect.\nExpected: {0}.\n'
                 'Found: {1}'.format(expected, actual))
 
-    @staticmethod
-    def assert_service_checks(expected, actual):
+    def assert_service_checks(self, expected, actual):
         actual = [TestSupervisordCheck.norm_service_check(service_check)
                   for service_check in actual]
-        eq_(len(actual), len(expected), msg='Invalid # service checks reported.'
-            '\nExpected: {0}. Found: {1}.'.format(len(expected), len(actual)))
-        ok_(all([expected_service_check in actual
+        self.assertEquals(len(actual), len(expected), msg='Invalid # service checks reported.'
+            '\nExpected: {0}. Found: {1}.'.format(expected, actual))
+        self.assertTrue(all([expected_service_check in actual
                  for expected_service_check in expected]),
             msg='Reported service checks are incorrect.\nExpected:{0}\n'
                 'Found:{1}'.format(expected, actual))