Skip to content

Commit

Permalink
Fix bug when multiple instances were connecting to the same pg instance
Browse files Browse the repository at this point in the history
Fix #1211
  • Loading branch information
remh committed Feb 27, 2015
1 parent 7457ed2 commit 70374b4
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 8 deletions.
33 changes: 32 additions & 1 deletion checks.d/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ def __init__(self, name, init_config, agentConfig, instances=None):
self.versions = {}
self.instance_metrics = {}
self.bgw_metrics = {}
self.db_instance_metrics = []
self.db_bgw_metrics = []

def _get_version(self, key, db):
if key not in self.versions:
Expand Down Expand Up @@ -237,7 +239,22 @@ def _get_instance_metrics(self, key, db):
"""
# Extended 9.2+ metrics if needed
metrics = self.instance_metrics.get(key)

if metrics is None:

# Hack to make sure that if we have multiple instances that connect to
# the same host, port, we don't collect metrics twice
# as it will result in https://github.com/DataDog/dd-agent/issues/1211
sub_key = key[:2]
if sub_key in self.db_instance_metrics:
self.instance_metrics[key] = {}
self.log.debug("Not collecting instance metrics for key: {0} as"\
" they are already collected by another instance".format(key))
return {}

self.db_instance_metrics.append(sub_key)


if self._is_9_2_or_above(key, db):
self.instance_metrics[key] = dict(self.COMMON_METRICS, **self.NEWER_92_METRICS)
else:
Expand All @@ -252,7 +269,21 @@ def _get_bgw_metrics(self, key, db):
"""
# Extended 9.2+ metrics if needed
metrics = self.bgw_metrics.get(key)

if metrics is None:

# Hack to make sure that if we have multiple instances that connect to
# the same host, port, we don't collect metrics twice
# as it will result in https://github.com/DataDog/dd-agent/issues/1211
sub_key = key[:2]
if sub_key in self.db_bgw_metrics:
self.bgw_metrics[key] = {}
self.log.debug("Not collecting bgw metrics for key: {0} as"\
" they are already collected by another instance".format(key))
return {}

self.db_bgw_metrics.append(sub_key)

self.bgw_metrics[key] = dict(self.COMMON_BGW_METRICS)
if self._is_9_1_or_above(key, db):
self.bgw_metrics[key].update(self.NEWER_91_BGW_METRICS)
Expand Down Expand Up @@ -452,7 +483,7 @@ def check(self, instance):
if dbname is None:
dbname = 'postgres'

key = '%s:%s:%s' % (host, port, dbname)
key = (host, port, dbname)

# Clean up tags in case there was a None entry in the instance
# e.g. if the yaml contains tags: but no actual tags
Expand Down
20 changes: 13 additions & 7 deletions tests/test_postgresql.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,16 @@ def test_checks(self):
},
"query": "SELECT datname, %s FROM pg_stat_database WHERE datname = 'datadog_test' LIMIT(1)",
"relation": False,
}
]
}]
},
{
'host': host,
'port': port,
'username': 'datadog',
'password': 'datadog',
'dbname': 'postgres'
}
]
]
}
agentConfig = {
'version': '0.1',
Expand All @@ -51,7 +57,7 @@ def test_checks(self):
self.check.run()

# FIXME: Not great, should have a function like that available
key = '%s:%s:%s' % (host, port, dbname)
key = (host, port, dbname)
db = self.check.dbs[key]

metrics = self.check.get_metrics()
Expand Down Expand Up @@ -97,11 +103,11 @@ def test_checks(self):
service_checks_count = len(service_checks)
self.assertTrue(type(service_checks) == type([]))
self.assertTrue(service_checks_count > 0)
self.assertEquals(len([sc for sc in service_checks if sc['check'] == "postgres.can_connect"]), 2, service_checks)
self.assertEquals(len([sc for sc in service_checks if sc['check'] == "postgres.can_connect"]), 4, service_checks)
# Assert that all service checks have the proper tags: host, port and db
self.assertEquals(len([sc for sc in service_checks if "host:localhost" in sc['tags']]), service_checks_count, service_checks)
self.assertEquals(len([sc for sc in service_checks if "port:%s" % config['instances'][0]['port'] in sc['tags']]), service_checks_count, service_checks)
self.assertEquals(len([sc for sc in service_checks if "db:%s" % config['instances'][0]['dbname'] in sc['tags']]), service_checks_count, service_checks)
self.assertEquals(len([sc for sc in service_checks if "db:%s" % config['instances'][0]['dbname'] in sc['tags']]), service_checks_count/2, service_checks)

time.sleep(1)
self.check.run()
Expand All @@ -112,7 +118,7 @@ def test_checks(self):
self.assertEquals(len([m for m in metrics if 'table:persons' in str(m[3].get('tags', [])) ]), 11, metrics)

self.metrics = metrics
self.assertMetric("custom.numbackendss")
self.assertMetric("custom.numbackends")

if __name__ == '__main__':
unittest.main()

0 comments on commit 70374b4

Please sign in to comment.