Skip to content

Commit

Permalink
Merge pull request #1395 from DataDog/remh/postgres_custom_metrics
Browse files Browse the repository at this point in the history
Add pg_database_size and custom metrics to postgres integration
  • Loading branch information
Remi Hakim committed Feb 25, 2015
2 parents 92470f0 + 7457ed2 commit 95c3ba1
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 27 deletions.
57 changes: 47 additions & 10 deletions checks.d/postgres.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""PostgreSQL check
Collects database-wide metrics and optionally per-relation metrics.
Collects database-wide metrics and optionally per-relation metrics, custom metrics.
"""
# project
from checks import AgentCheck, CheckException
Expand All @@ -10,10 +10,13 @@
from pg8000 import InterfaceError, ProgrammingError
import socket


MAX_CUSTOM_RESULTS = 100

class ShouldRestartException(Exception): pass

class PostgreSql(AgentCheck):
"""Collects per-database, and optionally per-relation metrics
"""Collects per-database, and optionally per-relation metrics, custom metrics
"""
SOURCE_TYPE_NAME = 'postgresql'
RATE = AgentCheck.rate
Expand Down Expand Up @@ -48,6 +51,7 @@ class PostgreSql(AgentCheck):
'tup_inserted' : ('postgresql.rows_inserted', RATE),
'tup_updated' : ('postgresql.rows_updated', RATE),
'tup_deleted' : ('postgresql.rows_deleted', RATE),
'pg_database_size(datname) as pg_database_size' : ('postgresql.database_size', GAUGE),
}

NEWER_92_METRICS = {
Expand Down Expand Up @@ -151,9 +155,9 @@ class PostgreSql(AgentCheck):
('relname', 'table'),
],
'metrics': {
'pg_table_size(C.oid)' : ('postgresql.table_size', GAUGE),
'pg_indexes_size(C.oid)' : ('postgresql.index_size', GAUGE),
'pg_total_relation_size(C.oid)': ('postgresql.total_size', GAUGE),
'pg_table_size(C.oid) as table_size' : ('postgresql.table_size', GAUGE),
'pg_indexes_size(C.oid) as index_size' : ('postgresql.index_size', GAUGE),
'pg_total_relation_size(C.oid) as total_size' : ('postgresql.total_size', GAUGE),
},
'relation': True,
'query': """
Expand Down Expand Up @@ -257,10 +261,11 @@ def _get_bgw_metrics(self, key, db):
metrics = self.bgw_metrics.get(key)
return metrics

def _collect_stats(self, key, db, instance_tags, relations):
def _collect_stats(self, key, db, instance_tags, relations, custom_metrics):
"""Query pg_stat_* for various metrics
If relations is not an empty list, gather per-relation metrics
on top of that.
If custom_metrics is not an empty list, gather custom metrics defined in postgres.yaml
"""

self.DB_METRICS['metrics'] = self._get_instance_metrics(key, db)
Expand All @@ -285,10 +290,11 @@ def _collect_stats(self, key, db, instance_tags, relations):
if self._is_9_1_or_above(key,db):
metric_scope.append(self.REPLICATION_METRICS)

full_metric_scope = list(metric_scope) + custom_metrics
try:
cursor = db.cursor()

for scope in metric_scope:
for scope in full_metric_scope:
if scope == self.REPLICATION_METRICS or not self._is_above(key, db, [9,0,0]):
log_func = self.log.debug
warning_func = self.log.debug
Expand Down Expand Up @@ -319,8 +325,15 @@ def _collect_stats(self, key, db, instance_tags, relations):
if not results:
continue

if scope in custom_metrics and len(results) > MAX_CUSTOM_RESULTS:
self.warning(
"Query: {0} returned more than {1} results ({2})Truncating").format(
query, MAX_CUSTOM_RESULTS, len(results))
results = results[:MAX_CUSTOM_RESULTS]

if scope == self.DB_METRICS:
self.gauge("postgresql.db.count", len(results), tags=[t for t in instance_tags if not t.startswith("db:")])
self.gauge("postgresql.db.count", len(results),
tags=[t for t in instance_tags if not t.startswith("db:")])

# parse & submit results
# A row should look like this
Expand Down Expand Up @@ -402,6 +415,25 @@ def get_connection(self, key, host, port, user, password, dbname, use_cached=Tru
self.dbs[key] = connection
return connection

def _process_customer_metrics(self,custom_metrics):
required_parameters = ("descriptors", "metrics", "query", "relation")

for m in custom_metrics:
for param in required_parameters:
if param not in m:
raise CheckException("Missing {0} parameter in custom metric"\
.format(param))

self.log.debug("Metric: {0}".format(m))

for k, v in m['metrics'].items():
if v[1].upper() not in ['RATE','GAUGE','MONOTONIC']:
raise CheckException("Collector method {0} is not known."\
"Known methods are RATE,GAUGE,MONOTONIC".format(
v[1].upper()))

m['metrics'][k][1] = getattr(PostgreSql, v[1].upper())
self.log.debug("Method: %s" % (str(v[1])))

def check(self, instance):
host = instance.get('host', '')
Expand All @@ -411,6 +443,8 @@ def check(self, instance):
tags = instance.get('tags', [])
dbname = instance.get('dbname', None)
relations = instance.get('relations', [])
custom_metrics = instance.get('custom_metrics') or []
self._process_customer_metrics(custom_metrics)

if relations and not dbname:
self.warning('"dbname" parameter must be set when using the "relations" parameter.')
Expand All @@ -430,6 +464,9 @@ def check(self, instance):
# preset tags to the database name
tags.extend(["db:%s" % dbname])

self.log.debug("Custom metrics: %s" % custom_metrics)

# preset tags to the database name
db = None

# Collect metrics
Expand All @@ -438,11 +475,11 @@ def check(self, instance):
db = self.get_connection(key, host, port, user, password, dbname)
version = self._get_version(key, db)
self.log.debug("Running check against version %s" % version)
self._collect_stats(key, db, tags, relations)
self._collect_stats(key, db, tags, relations, custom_metrics)
except ShouldRestartException:
self.log.info("Resetting the connection")
db = self.get_connection(key, host, port, user, password, dbname, use_cached=False)
self._collect_stats(key, db, tags, relations)
self._collect_stats(key, db, tags, relations, custom_metrics)

if db is not None:
service_check_tags = self._get_service_check_tags(host, port, dbname)
Expand Down
46 changes: 33 additions & 13 deletions conf.d/postgres.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,37 @@ init_config:
instances:
- host: localhost
port: 5432
# username: my_username
# password: my_password
# dbname: db_name
# tags:
# - optional_tag1
# - optional_tag2
# username: my_username
# password: my_password
# dbname: db_name
# tags:
# - optional_tag1
# - optional_tag2

# Track per-relation (table) metrics
# The list of relations/tables must be specified here.
# Each relation generates many metrics (10 + 10 per index)
#
# relations:
# - my_table
# - my_other_table
# Track per-relation (table) metrics
# The list of relations/tables must be specified here.
# Each relation generates many metrics (10 + 10 per index)
#
# relations:
# - my_table
# - my_other_table


# Custom metrics
# Below are some examples of commonly used metrics, which are implemented as custom metrics.
# Uncomment them if you want to use them as is, or use as an example for creating your own custom metrics.
# The format for describing custome metrics is identical with the one used for common metrics in postgres.py
# Be extra careful with ensuring proper custom metrics description format. If your custom metric does not work
# after an agent restart, look for errors in the otput of "/etc/init.d/datadog-agent info" command, as well as
# /var/log/datadog/collector.log file.
#
# custom_metrics:
# - # Londiste 3 replication lag
# descriptors:
# - [consumer_name, consumer_name]
# metrics:
# GREATEST(0, EXTRACT(EPOCH FROM lag)) as lag: [postgresql.londiste_lag, GAUGE]
# GREATEST(0, EXTRACT(EPOCH FROM lag)) as last_seen: [postgresql.londiste_last_seen, GAUGE]
# pending_events: [postgresql.londiste_pending_events, GAUGE]
# query: SELECT consumer_name, %s from pgq.get_consumer_info() where consumer_name !~ 'watermark$';
# relation: false
25 changes: 21 additions & 4 deletions tests/test_postgresql.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import unittest
from tests.common import load_check
from tests.common import load_check, AgentCheckTest

from nose.plugins.attrib import attr

import time
from pprint import pprint

@attr(requires='postgres')
class TestPostgres(unittest.TestCase):
class TestPostgres(AgentCheckTest):

CHECK_NAME = "postgres"

def test_checks(self):
host = 'localhost'
Expand All @@ -23,6 +25,18 @@ def test_checks(self):
'password': 'datadog',
'dbname': dbname,
'relations': ['persons'],
'custom_metrics': [
{
"descriptors": [
("datname", "customdb")
],
"metrics": {
"numbackends": ["custom.numbackends", "Gauge"],
},
"query": "SELECT datname, %s FROM pg_stat_database WHERE datname = 'datadog_test' LIMIT(1)",
"relation": False,
}
]
}
]
}
Expand Down Expand Up @@ -62,8 +76,8 @@ def test_checks(self):
self.check.run()
metrics = self.check.get_metrics()

exp_metrics = 37
exp_db_tagged_metrics = 24
exp_metrics = 39
exp_db_tagged_metrics = 26

if self.check._is_9_2_or_above(key, db):
self.assertTrue(len([m for m in metrics if m[0] == u'postgresql.bgwriter.sync_time']) >= 1, pprint(metrics))
Expand Down Expand Up @@ -97,5 +111,8 @@ def test_checks(self):
self.assertEquals(len([m for m in metrics if 'db:datadog_test' in str(m[3].get('tags', []))]), exp_db_tagged_metrics, metrics)
self.assertEquals(len([m for m in metrics if 'table:persons' in str(m[3].get('tags', [])) ]), 11, metrics)

self.metrics = metrics
self.assertMetric("custom.numbackendss")

if __name__ == '__main__':
unittest.main()

0 comments on commit 95c3ba1

Please sign in to comment.