From dae58736ab1c5777a988f00ae59fa5e08c330438 Mon Sep 17 00:00:00 2001
From: Kris Kumler <kkumler@adonnetwork.com>
Date: Tue, 25 Nov 2014 14:57:48 -0500
Subject: [PATCH 1/4] Added histogram min and adjusted unit tests.

---
 aggregator.py                   |  2 ++
 tests/test_bucket_aggregator.py | 37 +++++++++++++++++++--------------
 tests/test_dogstatsd.py         | 25 +++++++++++-----------
 tests/test_go_expvar.py         |  2 +-
 4 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/aggregator.py b/aggregator.py
index 1d951f8828..303626d0df 100644
--- a/aggregator.py
+++ b/aggregator.py
@@ -236,11 +236,13 @@ def flush(self, ts, interval):
         self.samples.sort()
         length = len(self.samples)
 
+        min_ = self.samples[0]
         max_ = self.samples[-1]
         med = self.samples[int(round(length/2 - 1))]
         avg = sum(self.samples) / float(length)
 
         metric_aggrs = [
+            ('min', min_, MetricTypes.GAUGE),
             ('max', max_, MetricTypes.GAUGE),
             ('median', med, MetricTypes.GAUGE),
             ('avg', avg, MetricTypes.GAUGE),
diff --git a/tests/test_bucket_aggregator.py b/tests/test_bucket_aggregator.py
index 3c70426610..73aaec0601 100644
--- a/tests/test_bucket_aggregator.py
+++ b/tests/test_bucket_aggregator.py
@@ -72,8 +72,8 @@ def test_histogram_normalization(self):
 
         self.sleep_for_interval_length(ag_interval)
         metrics = self.sort_metrics(stats.flush())
-        _, _, h1count, _, _, \
-        _, _, h2count, _, _ = metrics
+        _, _, h1count, _, _, _, \
+        _, _, h2count, _, _, _ = metrics
 
         nt.assert_equal(h1count['points'][0][1], 0.5)
         nt.assert_equal(h2count['points'][0][1], 2)
@@ -554,13 +554,14 @@ def test_histogram(self):
         self.sleep_for_interval_length(ag_interval)
         metrics = self.sort_metrics(stats.flush())
 
-        nt.assert_equal(len(metrics), 5)
-        p95, pavg, pcount, pmax, pmed = self.sort_metrics(metrics)
+        nt.assert_equal(len(metrics), 6)
+        p95, pavg, pcount, pmax, pmed, pmin = self.sort_metrics(metrics)
         nt.assert_equal(p95['metric'], 'my.p.95percentile')
         self.assert_almost_equal(p95['points'][0][1], 95, 10)
         self.assert_almost_equal(pmax['points'][0][1], 99, 1)
         self.assert_almost_equal(pmed['points'][0][1], 50, 2)
         self.assert_almost_equal(pavg['points'][0][1], 50, 2)
+        self.assert_almost_equal(pmin['points'][0][1], 1, 1)
         nt.assert_equals(pcount['points'][0][1], 4000) # 100 * 20 * 2
         nt.assert_equals(p95['host'], 'myhost')
 
@@ -578,10 +579,10 @@ def test_sampled_histogram(self):
         # Assert we scale up properly.
         self.sleep_for_interval_length()
         metrics = self.sort_metrics(stats.flush())
-        p95, pavg, pcount, pmax, pmed = self.sort_metrics(metrics)
+        p95, pavg, pcount, pmax, pmed, pmin = self.sort_metrics(metrics)
 
         nt.assert_equal(pcount['points'][0][1], 2)
-        for p in [p95, pavg, pmed, pmax]:
+        for p in [p95, pavg, pmed, pmax, pmin]:
             nt.assert_equal(p['points'][0][1], 5)
 
     def test_histogram_buckets(self):
@@ -611,13 +612,14 @@ def test_histogram_buckets(self):
         self.sleep_for_interval_length(ag_interval)
         metrics = self.sort_metrics(stats.flush())
 
-        nt.assert_equal(len(metrics), 10)
-        p95, p95_b, pavg, pavg_b, pcount, pcount_b, pmax, pmax_b, pmed, pmed_b = self.sort_metrics(metrics)
+        nt.assert_equal(len(metrics), 12)
+        p95, p95_b, pavg, pavg_b, pcount, pcount_b, pmax, pmax_b, pmed, pmed_b, pmin, pmin_b = self.sort_metrics(metrics)
         nt.assert_equal(p95['metric'], 'my.p.95percentile')
         self.assert_almost_equal(p95['points'][0][1], 95, 10)
         self.assert_almost_equal(pmax['points'][0][1], 99, 1)
         self.assert_almost_equal(pmed['points'][0][1], 50, 2)
         self.assert_almost_equal(pavg['points'][0][1], 50, 2)
+        self.assert_almost_equal(pmin['points'][0][1], 1, 1)
         nt.assert_equals(pcount['points'][0][1], 4000) # 100 * 20 * 2
 
         nt.assert_equal(p95_b['metric'], 'my.p.95percentile')
@@ -625,6 +627,7 @@ def test_histogram_buckets(self):
         self.assert_almost_equal(pmax_b['points'][0][1], 49, 1)
         self.assert_almost_equal(pmed_b['points'][0][1], 25, 2)
         self.assert_almost_equal(pavg_b['points'][0][1], 25, 2)
+        self.assert_almost_equal(pmin_b['points'][0][1], 1, 1)
         nt.assert_equals(pcount_b['points'][0][1], 2000) # 100 * 20 * 2
 
         nt.assert_equals(p95['host'], 'myhost')
@@ -663,25 +666,27 @@ def test_histogram_flush_during_bucket(self):
 
         metrics = self.sort_metrics(stats.flush())
 
-        nt.assert_equal(len(metrics), 5)
-        p95, pavg, pcount, pmax, pmed = self.sort_metrics(metrics)
+        nt.assert_equal(len(metrics), 6)
+        p95, pavg, pcount, pmax, pmed, pmin = self.sort_metrics(metrics)
         nt.assert_equal(p95['metric'], 'my.p.95percentile')
         self.assert_almost_equal(p95['points'][0][1], 95, 10)
         self.assert_almost_equal(pmax['points'][0][1], 99, 1)
         self.assert_almost_equal(pmed['points'][0][1], 50, 2)
         self.assert_almost_equal(pavg['points'][0][1], 50, 2)
+        self.assert_almost_equal(pmin['points'][0][1], 1, 1)
         nt.assert_equal(pcount['points'][0][1], 4000) # 100 * 20 * 2
         nt.assert_equals(p95['host'], 'myhost')
 
         self.sleep_for_interval_length()
         metrics = self.sort_metrics(stats.flush())
-        nt.assert_equal(len(metrics), 5)
-        p95_b, pavg_b, pcount_b, pmax_b, pmed_b = self.sort_metrics(metrics)
+        nt.assert_equal(len(metrics), 6)
+        p95_b, pavg_b, pcount_b, pmax_b, pmed_b, pmin_b = self.sort_metrics(metrics)
         nt.assert_equal(p95_b['metric'], 'my.p.95percentile')
         self.assert_almost_equal(p95_b['points'][0][1], 47, 10)
         self.assert_almost_equal(pmax_b['points'][0][1], 49, 1)
         self.assert_almost_equal(pmed_b['points'][0][1], 25, 2)
         self.assert_almost_equal(pavg_b['points'][0][1], 25, 2)
+        self.assert_almost_equal(pmin_b['points'][0][1], 1, 1)
         nt.assert_equals(pcount_b['points'][0][1], 2000) # 100 * 20 * 2
 
         # Ensure that histograms are reset.
@@ -747,7 +752,7 @@ def test_metrics_expiry(self):
         # Ensure points keep submitting
         self.sleep_for_interval_length()
         metrics = self.sort_metrics(stats.flush())
-        nt.assert_equal(len(metrics), 8)
+        nt.assert_equal(len(metrics), 9)
         nt.assert_equal(metrics[0]['metric'], 'test.counter')
         nt.assert_equal(metrics[0]['points'][0][1], 123)
         nt.assert_equal(metrics[0]['points'][0][0], submit_bucket_timestamp)
@@ -802,7 +807,7 @@ def test_metrics_expiry(self):
         stats.submit_packets('test.histogram:11|h')
         self.sleep_for_interval_length()
         metrics = self.sort_metrics(stats.flush())
-        nt.assert_equal(len(metrics), 8)
+        nt.assert_equal(len(metrics), 9)
         nt.assert_equal(metrics[0]['metric'], 'test.counter')
         nt.assert_equal(metrics[0]['points'][0][1], 123)
 
@@ -967,9 +972,9 @@ def test_recent_point_threshold(self):
         # The bucket timestamp is the beginning of the bucket that ended before we flushed
         bucket_timestamp = flush_timestamp - (flush_timestamp % ag_interval) - ag_interval
         metrics = self.sort_metrics(stats.flush())
-        nt.assert_equal(len(metrics), 10)
+        nt.assert_equal(len(metrics), 11)
 
-        first, first_b, second, second_b, third, h1, h2, h3, h4, h5 = metrics
+        first, first_b, second, second_b, third, h1, h2, h3, h4, h5, h6 = metrics
         nt.assert_equals(first['metric'], 'my.1.gauge')
         nt.assert_equals(first['points'][0][1], 1)
         nt.assert_equals(first['host'], 'myhost')
diff --git a/tests/test_dogstatsd.py b/tests/test_dogstatsd.py
index 018366a2bb..de132bcfe1 100644
--- a/tests/test_dogstatsd.py
+++ b/tests/test_dogstatsd.py
@@ -80,8 +80,8 @@ def test_histogram_normalization(self):
             stats.submit_packets('h2:1|h')
 
         metrics = self.sort_metrics(stats.flush())
-        _, _, h1count, _, _, \
-        _, _, h2count, _, _ = metrics
+        _, _, h1count, _, _, _, \
+        _, _, h2count, _, _, _ = metrics
 
         nt.assert_equal(h1count['points'][0][1], 0.5)
         nt.assert_equal(h2count['points'][0][1], 2)
@@ -338,13 +338,14 @@ def test_histogram(self):
 
         metrics = self.sort_metrics(stats.flush())
 
-        nt.assert_equal(len(metrics), 5)
-        p95, pavg, pcount, pmax, pmed = self.sort_metrics(metrics)
+        nt.assert_equal(len(metrics), 6)
+        p95, pavg, pcount, pmax, pmed, pmin = self.sort_metrics(metrics)
         nt.assert_equal(p95['metric'], 'my.p.95percentile')
         self.assert_almost_equal(p95['points'][0][1], 95, 10)
         self.assert_almost_equal(pmax['points'][0][1], 99, 1)
         self.assert_almost_equal(pmed['points'][0][1], 50, 2)
         self.assert_almost_equal(pavg['points'][0][1], 50, 2)
+        self.assert_almost_equal(pmin['points'][0][1], 1, 1)
         self.assert_almost_equal(pcount['points'][0][1], 4000, 0) # 100 * 20 * 2
         nt.assert_equals(p95['host'], 'myhost')
 
@@ -361,10 +362,10 @@ def test_sampled_histogram(self):
 
         # Assert we scale up properly.
         metrics = self.sort_metrics(stats.flush())
-        p95, pavg, pcount, pmax, pmed = self.sort_metrics(metrics)
+        p95, pavg, pcount, pmax, pmed, pmin = self.sort_metrics(metrics)
 
         nt.assert_equal(pcount['points'][0][1], 2)
-        for p in [p95, pavg, pmed, pmax]:
+        for p in [p95, pavg, pmed, pmax, pmin]:
             nt.assert_equal(p['points'][0][1], 5)
 
     def test_batch_submission(self):
@@ -399,7 +400,7 @@ def test_monokey_batching_notags(self):
         metrics = stats.flush()
         metrics_ref = stats_ref.flush()
 
-        self.assertTrue(len(metrics) == len(metrics_ref) == 5, (metrics, metrics_ref))
+        self.assertTrue(len(metrics) == len(metrics_ref) == 6, (metrics, metrics_ref))
 
         for i in range(len(metrics)):
             nt.assert_equal(metrics[i]['points'][0][1], metrics_ref[i]['points'][0][1])
@@ -443,7 +444,7 @@ def test_monokey_batching_withtags_with_sampling(self):
         metrics = self.sort_metrics(stats.flush())
         metrics_ref = self.sort_metrics(stats_ref.flush())
 
-        self.assertTrue(len(metrics) == len(metrics_ref) == 8, (metrics, metrics_ref))
+        self.assertTrue(len(metrics) == len(metrics_ref) == 9, (metrics, metrics_ref))
         for i in range(len(metrics)):
             nt.assert_equal(metrics[i]['points'][0][1], metrics_ref[i]['points'][0][1])
             nt.assert_equal(metrics[i]['tags'], metrics_ref[i]['tags'])
@@ -486,7 +487,7 @@ def test_metrics_expiry(self):
         # Ensure points keep submitting
         time.sleep(ag_interval)
         metrics = self.sort_metrics(stats.flush())
-        nt.assert_equal(len(metrics), 8)
+        nt.assert_equal(len(metrics), 9)
         nt.assert_equal(metrics[0]['metric'], 'test.counter')
         nt.assert_equal(metrics[0]['points'][0][1], 123)
         time.sleep(ag_interval)
@@ -516,7 +517,7 @@ def test_metrics_expiry(self):
         stats.submit_packets('test.histogram:11|h')
 
         metrics = self.sort_metrics(stats.flush())
-        nt.assert_equal(len(metrics), 8)
+        nt.assert_equal(len(metrics), 9)
         nt.assert_equal(metrics[0]['metric'], 'test.counter')
         nt.assert_equal(metrics[0]['points'][0][1], 123)
 
@@ -661,9 +662,9 @@ def test_recent_point_threshold(self):
 
         flush_timestamp = time.time()
         metrics = self.sort_metrics(stats.flush())
-        nt.assert_equal(len(metrics), 8)
+        nt.assert_equal(len(metrics), 9)
 
-        first, second, third, h1, h2, h3, h4, h5 = metrics
+        first, second, third, h1, h2, h3, h4, h5, h6 = metrics
         nt.assert_equals(first['metric'], 'my.1.gauge')
         nt.assert_equals(first['points'][0][1], 1)
         nt.assert_equals(first['host'], 'myhost')
diff --git a/tests/test_go_expvar.py b/tests/test_go_expvar.py
index fc22151613..15bd6335a3 100644
--- a/tests/test_go_expvar.py
+++ b/tests/test_go_expvar.py
@@ -55,7 +55,7 @@ def testGoExpVar(self):
         metrics = self.check.get_metrics()
 
         # The rate is not here so only 2
-        self.assertEqual(len(metrics), 15, metrics)
+        self.assertEqual(len(metrics), 16, metrics)
         self._assert_metric_number(metrics, 'go_expvar.gauge1', 1)
         self._assert_metric_number(metrics, 'go_expvar.memstats.by_size.1.mallocs', 1)
 

From acfc284e4aaa2532ab9ac35705eb0d8c6ae64a9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C3=A9o=20Cavaill=C3=A9?= <leo@datadoghq.com>
Date: Fri, 16 Jan 2015 12:15:06 +0100
Subject: [PATCH 2/4] Skip min_collection test on Travis for now

---
 tests/test_common.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/tests/test_common.py b/tests/test_common.py
index b12ace3b3b..282e0d27cf 100644
--- a/tests/test_common.py
+++ b/tests/test_common.py
@@ -1,15 +1,26 @@
+import logging
+import os
 import time
 import unittest
+
 from nose.plugins.attrib import attr
-import logging
-logger = logging.getLogger()
-from checks import (Check, AgentCheck,
-    CheckException, UnknownValue, CheckException, Infinity)
-from checks.collector import Collector
+from nose.plugins.skip import SkipTest
+
 from aggregator import MetricsAggregator
-from common import load_check
+from checks import (
+    AgentCheck,
+    Check,
+    CheckException,
+    CheckException,
+    Infinity,
+    UnknownValue,
+)
+from checks.collector import Collector
+from tests.common import load_check
 from util import get_hostname
 
+logger = logging.getLogger()
+
 class TestCore(unittest.TestCase):
     "Tests to validate the core check logic"
 
@@ -174,7 +185,8 @@ def test_collector(self):
             assert tag in all_tags, all_tags
 
     def test_min_collection_interval(self):
-
+        if os.environ.get('TRAVIS', False):
+            raise SkipTest('ntp server times out too often on Travis')
         config = {'instances': [{'host': '0.amazon.pool.ntp.org', 'timeout': 1}], 'init_config': {}}
 
         agentConfig = {

From 134856397dc5f35974250981f71b342f93e76ff3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C3=A9o=20Cavaill=C3=A9?= <leo@datadoghq.com>
Date: Fri, 16 Jan 2015 02:13:42 +0100
Subject: [PATCH 3/4] Minor improvements to run tests locally

---
 Rakefile                |   5 +-
 checks.d/zk.py          |   2 +-
 ci/default.rb           |   3 +-
 tests/test_config.py    |   3 +-
 tests/test_ec2.py       |   5 +-
 tests/test_system.py    |  11 +++--
 tests/test_zookeeper.py | 105 ++++++++++++++++++++++------------------
 7 files changed, 76 insertions(+), 58 deletions(-)

diff --git a/Rakefile b/Rakefile
index 29a6d55f74..3ce0d608ed 100755
--- a/Rakefile
+++ b/Rakefile
@@ -34,7 +34,8 @@ unless ENV['IS_TRAVIS']
   ENV['INTEGRATIONS_DIR'] = File.join(rakefile_dir, 'embedded')
   ENV['PIP_CACHE'] = File.join(rakefile_dir, '.pip-cache')
   ENV['VOLATILE_DIR'] = '/tmp/dd-agent-testing'
-  ENV['CONCURRENCY'] = ENV['CONCURRENCY'] || '2' 
+  ENV['CONCURRENCY'] = ENV['CONCURRENCY'] || '2'
+  ENV['NOSE_FILTER'] = 'not windows'
 end
 
 desc 'Setup a development environment for the Agent'
@@ -74,7 +75,7 @@ end
 
 desc "Lint the code through pylint"
 task "lint" do
-  sh %{find . -name '*.py' -type f -not -path '*venv*' | xargs --max-procs=$CONCURRENCY -n 1 pylint --rcfile=./.pylintrc}
+  sh %{find . -name '*.py' -type f -not -path '*venv*' -not -path '*embedded*' -exec pylint --rcfile=./.pylintrc {} \\;}
 end
 
 desc "Run the Agent locally"
diff --git a/checks.d/zk.py b/checks.d/zk.py
index c2b6fe73aa..bbb7bde966 100644
--- a/checks.d/zk.py
+++ b/checks.d/zk.py
@@ -37,7 +37,7 @@ class ZKConnectionFailure(Exception):
     pass
 
 
-class Zookeeper(AgentCheck):
+class ZookeeperCheck(AgentCheck):
     version_pattern = re.compile(r'Zookeeper version: ([^.]+)\.([^.]+)\.([^-]+)', flags=re.I)
 
     SOURCE_TYPE_NAME = 'zookeeper'
diff --git a/ci/default.rb b/ci/default.rb
index bd4718c2d5..b9204ed94b 100644
--- a/ci/default.rb
+++ b/ci/default.rb
@@ -9,8 +9,7 @@
     task :before_script => ['ci:common:before_script']
 
     task :script => ['ci:common:script'] do
-      sh %(find . -name '*.py'\
-           | xargs --max-procs=0 -n 1 pylint --rcfile=./.pylintrc)
+      sh %(find . -name '*.py' -not -path '*venv*' -not -path '*embedded*' -exec pylint --rcfile=./.pylintrc {} \\;)
       Rake::Task['ci:common:run_tests'].invoke('default')
     end
 
diff --git a/tests/test_config.py b/tests/test_config.py
index 298ee30b0d..76a346ae29 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1,5 +1,6 @@
 ## -*- coding: latin-1 -*-
 import unittest
+import os
 import os.path
 import tempfile
 
@@ -42,7 +43,7 @@ def testBadPidFile(self):
 
         p = PidFile('test', pid_dir)
         path = p.get_path()
-        self.assertEquals(path, "/tmp/test.pid")
+        self.assertEquals(path, os.path.join(tempfile.gettempdir(), 'test.pid'))
 
         pid = "666"
         pid_f = open(path, 'w')
diff --git a/tests/test_ec2.py b/tests/test_ec2.py
index 01ce92f145..4692b7863b 100644
--- a/tests/test_ec2.py
+++ b/tests/test_ec2.py
@@ -8,8 +8,8 @@
 class TestEC2(unittest.TestCase):
 
     def test_metadata(self):
-        # Skip this step on travis
-        if os.environ.get('TRAVIS', False): return
+        # Reset metadata just to be sure
+        EC2.metadata = {}
         # Test gathering metadata from ec2
         start = time.time()
         d = EC2.get_metadata({'collect_instance_metadata': True})
@@ -19,7 +19,6 @@ def test_metadata(self):
         assert len(d) == 0 or len(d) >= 7, d
         if "instance-id" in d:
             assert d["instance-id"].startswith("i-"), d
-            assert d["hostname"].startswith("i-") or d["hostname"].startswith("domU-"), d
         assert end - start <= 1.1, "It took %s seconds to get ec2 metadata" % (end-start)
 
 if __name__ == "__main__":
diff --git a/tests/test_system.py b/tests/test_system.py
index 9f82e5cd74..879dc05766 100644
--- a/tests/test_system.py
+++ b/tests/test_system.py
@@ -245,9 +245,14 @@ def testNetwork(self):
 
         assert 'system.net.bytes_rcvd' in metric_names
         assert 'system.net.bytes_sent' in metric_names
-        assert 'system.net.tcp.retrans_segs' in metric_names
-        assert 'system.net.tcp.in_segs' in metric_names
-        assert 'system.net.tcp.out_segs' in metric_names
+        if Platform.is_linux():
+            assert 'system.net.tcp.retrans_segs' in metric_names
+            assert 'system.net.tcp.in_segs' in metric_names
+            assert 'system.net.tcp.out_segs' in metric_names
+        elif Platform.is_bsd():
+            assert 'system.net.tcp.retrans_packs' in metric_names
+            assert 'system.net.tcp.sent_packs' in metric_names
+            assert 'system.net.tcp.rcv_packs' in metric_names
 
 
 if __name__ == "__main__":
diff --git a/tests/test_zookeeper.py b/tests/test_zookeeper.py
index 131b930178..0cac33ba02 100644
--- a/tests/test_zookeeper.py
+++ b/tests/test_zookeeper.py
@@ -1,8 +1,14 @@
+# stdlib
 import os
-import unittest
 from StringIO import StringIO
-from tests.common import get_check
+import unittest
+
+# 3p
+from mock import patch
+
+# project
 from checks import AgentCheck
+from tests.common import get_check
 
 CONFIG = """
 init_config:
@@ -21,15 +27,34 @@
     - host: 127.0.0.1
       port: 2182
       tags: []
+      expected_mode: leader
 """
 
-class TestZookeeper(unittest.TestCase):
-    def is_travis(self):
-        return "TRAVIS" in os.environ
+def send_command_lt_v344(cmd, *args):
+    if cmd == 'stat':
+        return StringIO("""Zookeeper version: 3.2.2--1, built on 03/16/2010 07:31 GMT
+    Clients:
+ /10.42.114.160:32634[1](queued=0,recved=12,sent=0)
+ /10.37.137.74:21873[1](queued=0,recved=53613,sent=0)
+ /10.37.137.74:21876[1](queued=0,recved=57436,sent=0)
+ /10.115.77.32:32990[1](queued=0,recved=16,sent=0)
+ /10.37.137.74:21891[1](queued=0,recved=55011,sent=0)
+ /10.37.137.74:21797[1](queued=0,recved=19431,sent=0)
 
-    def test_zk_stat_parsing_lt_v344(self):
-        zk, instances = get_check('zk', CONFIG)
-        stat_response = """Zookeeper version: 3.2.2--1, built on 03/16/2010 07:31 GMT
+Latency min/avg/max: -10/0/20007
+Received: 101032173
+Sent: 0
+Outstanding: 0
+Zxid: 0x1034799c7
+Mode: leader
+Node count: 487
+""")
+    else:
+        return StringIO()
+
+def send_command_gte_v344(cmd, *args):
+    if cmd == 'stat':
+        return StringIO("""Zookeeper version: 3.4.5--1, built on 03/16/2010 07:31 GMT
 Clients:
  /10.42.114.160:32634[1](queued=0,recved=12,sent=0)
  /10.37.137.74:21873[1](queued=0,recved=53613,sent=0)
@@ -41,11 +66,18 @@ def test_zk_stat_parsing_lt_v344(self):
 Latency min/avg/max: -10/0/20007
 Received: 101032173
 Sent: 0
+Connections: 1
 Outstanding: 0
 Zxid: 0x1034799c7
 Mode: leader
 Node count: 487
-"""
+""")
+    else:
+        return StringIO()
+
+class TestZookeeper(unittest.TestCase):
+    def test_zk_stat_parsing_lt_v344(self):
+        zk, instances = get_check('zk', CONFIG)
         expected = [
             ('zookeeper.latency.min',              -10),
             ('zookeeper.latency.avg',                0),
@@ -59,43 +91,22 @@ def test_zk_stat_parsing_lt_v344(self):
             ('zookeeper.nodes',                    487L),
         ]
 
-        buf = StringIO(stat_response)
-        metrics, tags, mode = zk.parse_stat(buf)
 
-        self.assertEquals(tags, ['mode:leader'])
-        self.assertEquals(metrics, expected)
+        with patch.object(zk, '_send_command', send_command_lt_v344):
+            zk.check(instances[0])
 
-        zk.check(instances[0])
         service_checks = zk.get_service_checks()
-        expected = 1 if self.is_travis() else 2
-        self.assertEquals(len(service_checks), expected)
         self.assertEquals(service_checks[0]['check'], 'zookeeper.ruok')
-        # Don't check status of ruok because it can vary if ZK is running.
+        self.assertEquals(service_checks[1]['check'], 'zookeeper.mode')
+        self.assertEquals(service_checks[1]['status'], AgentCheck.CRITICAL)
 
-        if not self.is_travis():
-            self.assertEquals(service_checks[1]['check'], 'zookeeper.mode')
-            self.assertEquals(service_checks[1]['status'], AgentCheck.CRITICAL)
+        metrics = zk.get_metrics()
+        self.assertEquals(sorted([(name, val) for name, _, val, _ in metrics]), sorted(expected))
+        self.assertEquals(len(service_checks), 2)
+        self.assertEquals(metrics[0][3]['tags'], ['mode:leader'])
 
     def test_zk_stat_parsing_gte_v344(self):
         zk, instances = get_check('zk', CONFIG2)
-        stat_response = """Zookeeper version: 3.4.5--1, built on 03/16/2010 07:31 GMT
-Clients:
- /10.42.114.160:32634[1](queued=0,recved=12,sent=0)
- /10.37.137.74:21873[1](queued=0,recved=53613,sent=0)
- /10.37.137.74:21876[1](queued=0,recved=57436,sent=0)
- /10.115.77.32:32990[1](queued=0,recved=16,sent=0)
- /10.37.137.74:21891[1](queued=0,recved=55011,sent=0)
- /10.37.137.74:21797[1](queued=0,recved=19431,sent=0)
-
-Latency min/avg/max: -10/0/20007
-Received: 101032173
-Sent: 0
-Connections: 1
-Outstanding: 0
-Zxid: 0x1034799c7
-Mode: leader
-Node count: 487
-"""
         expected = [
             ('zookeeper.latency.min',              -10),
             ('zookeeper.latency.avg',                0),
@@ -109,14 +120,16 @@ def test_zk_stat_parsing_gte_v344(self):
             ('zookeeper.nodes',                    487L),
         ]
 
-        buf = StringIO(stat_response)
-        metrics, tags, mode = zk.parse_stat(buf)
-
-        self.assertEquals(tags, ['mode:leader'])
-        self.assertEquals(metrics, expected)
 
-        zk.check(instances[0])
+        with patch.object(zk, '_send_command', send_command_gte_v344):
+            zk.check(instances[0])
         service_checks = zk.get_service_checks()
-        self.assertEquals(len(service_checks), 1)
+        self.assertEquals(len(service_checks), 2)
         self.assertEquals(service_checks[0]['check'], 'zookeeper.ruok')
-        self.assertEquals(service_checks[0]['status'], AgentCheck.CRITICAL)
+        self.assertEquals(service_checks[0]['status'], AgentCheck.WARNING)
+        self.assertEquals(service_checks[1]['check'], 'zookeeper.mode')
+        self.assertEquals(service_checks[1]['status'], AgentCheck.OK)
+
+        metrics = zk.get_metrics()
+        self.assertEquals(metrics[0][3]['tags'], ['mode:leader'])
+        self.assertEquals(sorted([(name, val) for name, _, val, _ in metrics]), sorted(expected))

From 9b8d250a9c590875cccc8e0316df43c41569b910 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C3=A9o=20Cavaill=C3=A9?= <leo@datadoghq.com>
Date: Fri, 16 Jan 2015 02:11:06 +0100
Subject: [PATCH 4/4] Make histogram aggregates/%iles configurable

---
 aggregator.py                   |  85 ++++++++++++++-----
 checks/__init__.py              |   8 +-
 config.py                       |  56 +++++++++++++
 datadog.conf.example            |   3 +
 dogstatsd.py                    |   8 +-
 tests/test_bucket_aggregator.py |  35 ++++++--
 tests/test_dogstatsd.py         |  49 ++++++++---
 tests/test_go_expvar.py         |   2 +-
 tests/test_histogram.py         | 143 ++++++++++++++++++++++++++++++++
 9 files changed, 346 insertions(+), 43 deletions(-)
 create mode 100644 tests/test_histogram.py

diff --git a/aggregator.py b/aggregator.py
index 303626d0df..9b3ec450c9 100644
--- a/aggregator.py
+++ b/aggregator.py
@@ -1,6 +1,8 @@
 import logging
 from time import time
+
 from checks.metric_types import MetricTypes
+from config import get_histogram_aggregates, get_histogram_percentiles
 
 log = logging.getLogger(__name__)
 
@@ -38,7 +40,7 @@ def flush(self, timestamp, interval):
 class Gauge(Metric):
     """ A metric that tracks a value at particular points in time. """
 
-    def __init__(self, formatter, name, tags, hostname, device_name):
+    def __init__(self, formatter, name, tags, hostname, device_name, extra_config=None):
         self.formatter = formatter
         self.name = name
         self.value = None
@@ -100,7 +102,7 @@ def flush(self, timestamp, interval):
 class Count(Metric):
     """ A metric that tracks a count. """
 
-    def __init__(self, formatter, name, tags, hostname, device_name):
+    def __init__(self, formatter, name, tags, hostname, device_name, extra_config=None):
         self.formatter = formatter
         self.name = name
         self.value = None
@@ -132,7 +134,7 @@ def flush(self, timestamp, interval):
 
 class MonotonicCount(Metric):
 
-    def __init__(self, formatter, name, tags, hostname, device_name):
+    def __init__(self, formatter, name, tags, hostname, device_name, extra_config=None):
         self.formatter = formatter
         self.name = name
         self.tags = tags
@@ -180,7 +182,7 @@ def flush(self, timestamp, interval):
 class Counter(Metric):
     """ A metric that tracks a counter value. """
 
-    def __init__(self, formatter, name, tags, hostname, device_name):
+    def __init__(self, formatter, name, tags, hostname, device_name, extra_config=None):
         self.formatter = formatter
         self.name = name
         self.value = 0
@@ -209,16 +211,23 @@ def flush(self, timestamp, interval):
         finally:
             self.value = 0
 
+DEFAULT_HISTOGRAM_AGGREGATES = ['max', 'median', 'avg', 'count']
+DEFAULT_HISTOGRAM_PERCENTILES = [0.95]
 
 class Histogram(Metric):
     """ A metric to track the distribution of a set of values. """
 
-    def __init__(self, formatter, name, tags, hostname, device_name):
+    def __init__(self, formatter, name, tags, hostname, device_name, extra_config=None):
         self.formatter = formatter
         self.name = name
         self.count = 0
         self.samples = []
-        self.percentiles = [0.95]
+        self.aggregates = extra_config['aggregates'] if\
+            extra_config is not None and extra_config.get('aggregates') is not None\
+            else DEFAULT_HISTOGRAM_AGGREGATES
+        self.percentiles = extra_config['percentiles'] if\
+            extra_config is not None and extra_config.get('percentiles') is not None\
+            else DEFAULT_HISTOGRAM_PERCENTILES
         self.tags = tags
         self.hostname = hostname
         self.device_name = device_name
@@ -241,12 +250,18 @@ def flush(self, ts, interval):
         med = self.samples[int(round(length/2 - 1))]
         avg = sum(self.samples) / float(length)
 
-        metric_aggrs = [
+        aggregators = [
             ('min', min_, MetricTypes.GAUGE),
             ('max', max_, MetricTypes.GAUGE),
             ('median', med, MetricTypes.GAUGE),
             ('avg', avg, MetricTypes.GAUGE),
-            ('count', self.count/interval, MetricTypes.RATE)
+            ('count', self.count/interval, MetricTypes.RATE),
+        ]
+
+        metric_aggrs = [
+            (agg_name, agg_func, m_type)
+            for agg_name, agg_func, m_type in aggregators
+            if agg_name in self.aggregates
         ]
 
         metrics = [self.formatter(
@@ -284,7 +299,7 @@ def flush(self, ts, interval):
 class Set(Metric):
     """ A metric to track the number of unique elements in a set. """
 
-    def __init__(self, formatter, name, tags, hostname, device_name):
+    def __init__(self, formatter, name, tags, hostname, device_name, extra_config=None):
         self.formatter = formatter
         self.name = name
         self.tags = tags
@@ -318,7 +333,7 @@ def flush(self, timestamp, interval):
 class Rate(Metric):
     """ Track the rate of metrics over each flush interval """
 
-    def __init__(self, formatter, name, tags, hostname, device_name):
+    def __init__(self, formatter, name, tags, hostname, device_name, extra_config=None):
         self.formatter = formatter
         self.name = name
         self.tags = tags
@@ -374,7 +389,9 @@ class Aggregator(object):
     # Types of metrics that allow strings
     ALLOW_STRINGS = ['s', ]
 
-    def __init__(self, hostname, interval=1.0, expiry_seconds=300, formatter=None, recent_point_threshold=None):
+    def __init__(self, hostname, interval=1.0, expiry_seconds=300,
+            formatter=None, recent_point_threshold=None,
+            histogram_aggregates=None, histogram_percentiles=None):
         self.events = []
         self.total_count = 0
         self.count = 0
@@ -388,6 +405,14 @@ def __init__(self, hostname, interval=1.0, expiry_seconds=300, formatter=None, r
         self.recent_point_threshold = int(recent_point_threshold)
         self.num_discarded_old_points = 0
 
+        # Additional config passed when instantiating metric configs
+        self.metric_config = {
+            Histogram: {
+                'aggregates': histogram_aggregates,
+                'percentiles': histogram_percentiles
+            }
+        }
+
     def packets_per_second(self, interval):
         if interval == 0:
             return 0
@@ -593,8 +618,18 @@ class MetricsBucketAggregator(Aggregator):
     A metric aggregator class.
     """
 
-    def __init__(self, hostname, interval=1.0, expiry_seconds=300, formatter=None, recent_point_threshold=None):
-        super(MetricsBucketAggregator, self).__init__(hostname, interval, expiry_seconds, formatter, recent_point_threshold)
+    def __init__(self, hostname, interval=1.0, expiry_seconds=300,
+            formatter=None, recent_point_threshold=None,
+            histogram_aggregates=None, histogram_percentiles=None):
+        super(MetricsBucketAggregator, self).__init__(
+            hostname,
+            interval,
+            expiry_seconds,
+            formatter,
+            recent_point_threshold,
+            histogram_aggregates,
+            histogram_percentiles
+        )
         self.metric_by_bucket = {}
         self.last_sample_time_by_context = {}
         self.current_bucket = None
@@ -647,7 +682,7 @@ def submit_metric(self, name, value, mtype, tags=None, hostname=None,
             if context not in metric_by_context:
                 metric_class = self.metric_type_to_class[mtype]
                 metric_by_context[context] = metric_class(self.formatter, name, tags,
-                    hostname, device_name)
+                    hostname, device_name, self.metric_config.get(metric_class))
 
             metric_by_context[context].sample(value, sample_rate, timestamp)
 
@@ -721,8 +756,18 @@ class MetricsAggregator(Aggregator):
     A metric aggregator class.
     """
 
-    def __init__(self, hostname, interval=1.0, expiry_seconds=300, formatter=None, recent_point_threshold=None):
-        super(MetricsAggregator, self).__init__(hostname, interval, expiry_seconds, formatter, recent_point_threshold)
+    def __init__(self, hostname, interval=1.0, expiry_seconds=300,
+            formatter=None, recent_point_threshold=None,
+            histogram_aggregates=None, histogram_percentiles=None):
+        super(MetricsAggregator, self).__init__(
+            hostname,
+            interval,
+            expiry_seconds,
+            formatter,
+            recent_point_threshold,
+            histogram_aggregates,
+            histogram_percentiles
+        )
         self.metrics = {}
         self.metric_type_to_class = {
             'g': Gauge,
@@ -749,7 +794,7 @@ def submit_metric(self, name, value, mtype, tags=None, hostname=None,
         if context not in self.metrics:
             metric_class = self.metric_type_to_class[mtype]
             self.metrics[context] = metric_class(self.formatter, name, tags,
-                hostname, device_name)
+                hostname, device_name, self.metric_config.get(metric_class))
         cur_time = time()
         if timestamp is not None and cur_time - int(timestamp) > self.recent_point_threshold:
             log.debug("Discarding %s - ts = %s , current ts = %s " % (name, timestamp, cur_time))
@@ -812,18 +857,18 @@ def get_formatter(config):
   formatter = api_formatter
 
   if config['statsd_metric_namespace']:
-    def metric_namespace_formatter_wrapper(metric, value, timestamp, tags, 
+    def metric_namespace_formatter_wrapper(metric, value, timestamp, tags,
         hostname=None, device_name=None, metric_type=None, interval=None):
       metric_prefix = config['statsd_metric_namespace']
       if metric_prefix[-1] != '.':
         metric_prefix += '.'
 
-      return api_formatter(metric_prefix + metric, value, timestamp, tags, hostname, 
+      return api_formatter(metric_prefix + metric, value, timestamp, tags, hostname,
         device_name, metric_type, interval)
 
     formatter = metric_namespace_formatter_wrapper
   return formatter
-  
+
 
 def api_formatter(metric, value, timestamp, tags, hostname=None, device_name=None,
         metric_type=None, interval=None):
diff --git a/checks/__init__.py b/checks/__init__.py
index 6c659fdd9e..262089fa9a 100644
--- a/checks/__init__.py
+++ b/checks/__init__.py
@@ -289,9 +289,13 @@ def __init__(self, name, init_config, agentConfig, instances=None):
         self.hostname = agentConfig.get('checksd_hostname') or get_hostname(agentConfig)
         self.log = logging.getLogger('%s.%s' % (__name__, name))
 
-        self.aggregator = MetricsAggregator(self.hostname,
+        self.aggregator = MetricsAggregator(
+            self.hostname,
             formatter=agent_formatter,
-            recent_point_threshold=agentConfig.get('recent_point_threshold', None))
+            recent_point_threshold=agentConfig.get('recent_point_threshold', None),
+            histogram_aggregates=agentConfig.get('histogram_aggregates'),
+            histogram_percentiles=agentConfig.get('histogram_percentiles')
+        )
 
         self.events = []
         self.service_checks = []
diff --git a/config.py b/config.py
index 32416b95b1..b3741a36fe 100644
--- a/config.py
+++ b/config.py
@@ -220,6 +220,55 @@ def get_default_bind_host():
         return '127.0.0.1'
     return 'localhost'
 
+
+def get_histogram_aggregates(configstr=None):
+    if configstr is None:
+        return None
+
+    try:
+        vals = configstr.split(',')
+        valid_values = ['min', 'max', 'median', 'avg', 'count']
+        result = []
+
+        for val in vals:
+            val = val.strip()
+            if val not in valid_values:
+                log.warning("Ignored histogram aggregate {0}, invalid".format(val))
+                continue
+            else:
+                result.append(val)
+    except Exception:
+        log.exception("Error when parsing histogram aggregates, skipping")
+        return None
+
+    return result
+
+def get_histogram_percentiles(configstr=None):
+    if configstr is None:
+        return None
+
+    result = []
+    try:
+        vals = configstr.split(',')
+        for val in vals:
+            try:
+                val = val.strip()
+                floatval = float(val)
+                if floatval <= 0 or floatval >= 1:
+                    raise ValueError
+                if len(val) > 4:
+                    log.warning("Histogram percentiles are rounded to 2 digits: {0} rounded"\
+                        .format(floatval))
+                result.append(float(val[0:4]))
+            except ValueError:
+                log.warning("Bad histogram percentile value {0}, must be float in ]0;1[, skipping"\
+                    .format(val))
+    except Exception:
+        log.exception("Error when parsing histogram percentiles, skipping")
+        return None
+
+    return result
+
 def get_config(parse_args=True, cfg_path=None, options=None):
     if parse_args:
         options, _ = get_parsed_args()
@@ -325,6 +374,13 @@ def get_config(parse_args=True, cfg_path=None, options=None):
             except Exception:
                 pass
 
+        # Custom histogram aggregate/percentile metrics
+        if config.has_option('Main', 'histogram_aggregates'):
+            agentConfig['histogram_aggregates'] = get_histogram_aggregates(config.get('Main', 'histograms_aggregates'))
+
+        if config.has_option('Main', 'histogram_percentiles'):
+            agentConfig['histogram_percentiles'] = get_histogram_percentiles(config.get('Main', 'histograms_percentiles'))
+
         # Disable Watchdog (optionally)
         if config.has_option('Main', 'watchdog'):
             if config.get('Main', 'watchdog').lower() in ('no', 'false'):
diff --git a/datadog.conf.example b/datadog.conf.example
index 4d05d7f607..094057d694 100644
--- a/datadog.conf.example
+++ b/datadog.conf.example
@@ -76,6 +76,9 @@ use_mount: no
 # for instance for security reasons
 # exclude_process_args: no
 
+# histogram_aggregates: max, median, avg, count
+# histogram_percentiles: 0.95
+
 # ========================================================================== #
 # DogStatsd configuration                                                    #
 # ========================================================================== #
diff --git a/dogstatsd.py b/dogstatsd.py
index 8dc676d086..c48c92637e 100755
--- a/dogstatsd.py
+++ b/dogstatsd.py
@@ -367,7 +367,7 @@ def init(config_path=None, use_watchdog=False, use_forwarder=False, args=None):
         sleep(4)
         sys.exit(0)
 
-    log.debug("Configurating     dogstatsd")
+    log.debug("Configuring dogstatsd")
 
     port      = c['dogstatsd_port']
     interval  = DOGSTATSD_FLUSH_INTERVAL
@@ -393,8 +393,10 @@ def init(config_path=None, use_watchdog=False, use_forwarder=False, args=None):
         hostname,
         aggregator_interval,
         recent_point_threshold=recent_point_threshold,
-        formatter = get_formatter(c)
-        )
+        formatter=get_formatter(c),
+        histogram_aggregates=c.get('histogram_aggregates'),
+        histogram_percentiles=c.get('histogram_percentiles'),
+    )
 
     # Start the reporting thread.
     reporter = Reporter(interval, aggregator, target, api_key, use_watchdog, event_chunk_size)
diff --git a/tests/test_bucket_aggregator.py b/tests/test_bucket_aggregator.py
index 73aaec0601..141e39a164 100644
--- a/tests/test_bucket_aggregator.py
+++ b/tests/test_bucket_aggregator.py
@@ -5,6 +5,7 @@
 import unittest
 import nose.tools as nt
 
+from aggregator import DEFAULT_HISTOGRAM_AGGREGATES
 from dogstatsd import MetricsBucketAggregator
 
 
@@ -64,7 +65,9 @@ def test_counter_normalization(self):
 
     def test_histogram_normalization(self):
         ag_interval = 10
-        stats = MetricsBucketAggregator('myhost', interval=ag_interval)
+        # The min is not enabled by default
+        stats = MetricsBucketAggregator('myhost', interval=ag_interval,
+            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min'])
         for i in range(5):
             stats.submit_packets('h1:1|h')
         for i in range(20):
@@ -538,7 +541,9 @@ def test_gauge_sample_rate(self):
 
     def test_histogram(self):
         ag_interval = self.interval
-        stats = MetricsBucketAggregator('myhost', interval=ag_interval)
+        # The min is not enabled by default
+        stats = MetricsBucketAggregator('myhost', interval=ag_interval,
+            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min'])
         self.wait_for_bucket_boundary(ag_interval)
 
         # Sample all numbers between 1-100 many times. This
@@ -572,7 +577,11 @@ def test_histogram(self):
 
     def test_sampled_histogram(self):
         # Submit a sampled histogram.
-        stats = MetricsBucketAggregator('myhost', interval=self.interval)
+        # The min is not enabled by default
+        stats = MetricsBucketAggregator('myhost',
+            interval=self.interval,
+            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min']
+        )
         stats.submit_packets('sampled.hist:5|h|@0.5')
 
 
@@ -587,7 +596,9 @@ def test_sampled_histogram(self):
 
     def test_histogram_buckets(self):
         ag_interval = 1
-        stats = MetricsBucketAggregator('myhost', interval=ag_interval)
+        # The min is not enabled by default
+        stats = MetricsBucketAggregator('myhost', interval=ag_interval,
+            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min'])
 
         # Sample all numbers between 1-100 many times. This
         # means our percentiles should be relatively close to themselves.
@@ -642,7 +653,9 @@ def test_histogram_buckets(self):
 
     def test_histogram_flush_during_bucket(self):
         ag_interval = 1
-        stats = MetricsBucketAggregator('myhost', interval=ag_interval)
+        # The min is not enabled by default
+        stats = MetricsBucketAggregator('myhost', interval=ag_interval,
+            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min'])
 
         # Sample all numbers between 1-100 many times. This
         # means our percentiles should be relatively close to themselves.
@@ -741,7 +754,10 @@ def test_metrics_expiry(self):
         # Ensure metrics eventually expire and stop submitting.
         ag_interval = self.interval
         expiry = ag_interval * 5 + 2
-        stats = MetricsBucketAggregator('myhost', interval=ag_interval, expiry_seconds=expiry)
+        # The min is not enabled by default
+        stats = MetricsBucketAggregator('myhost', interval=ag_interval,
+            expiry_seconds=expiry,
+            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min'])
         stats.submit_packets('test.counter:123|c')
         stats.submit_packets('test.gauge:55|g')
         stats.submit_packets('test.set:44|s')
@@ -942,7 +958,12 @@ def test_event_text(self):
     def test_recent_point_threshold(self):
         ag_interval = 1
         threshold = 100
-        stats = MetricsBucketAggregator('myhost', recent_point_threshold=threshold, interval=ag_interval)
+        # The min is not enabled by default
+        stats = MetricsBucketAggregator('myhost',
+            recent_point_threshold=threshold,
+            interval=ag_interval,
+            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min']
+        )
         timestamp_beyond_threshold = time.time() - threshold*2
 
         # Ensure that old gauges get dropped due to old timestamps
diff --git a/tests/test_dogstatsd.py b/tests/test_dogstatsd.py
index de132bcfe1..c2cabb4b15 100644
--- a/tests/test_dogstatsd.py
+++ b/tests/test_dogstatsd.py
@@ -5,7 +5,7 @@
 import unittest
 import nose.tools as nt
 
-from aggregator import MetricsAggregator, get_formatter
+from aggregator import MetricsAggregator, get_formatter, DEFAULT_HISTOGRAM_AGGREGATES
 
 class TestUnitDogStatsd(unittest.TestCase):
 
@@ -73,7 +73,11 @@ def test_counter_normalization(self):
         nt.assert_equal(intc['host'], 'myhost')
 
     def test_histogram_normalization(self):
-        stats = MetricsAggregator('myhost', interval=10)
+        # The min is not enabled by default
+        stats = MetricsAggregator('myhost',
+            interval=10,
+            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min']
+        )
         for i in range(5):
             stats.submit_packets('h1:1|h')
         for i in range(20):
@@ -324,7 +328,10 @@ def test_gauge_sample_rate(self):
         nt.assert_equal(m['points'][0][1], 10)
 
     def test_histogram(self):
-        stats = MetricsAggregator('myhost')
+        # The min is not enabled by default
+        stats = MetricsAggregator('myhost',
+            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min']
+        )
 
         # Sample all numbers between 1-100 many times. This
         # means our percentiles should be relatively close to themselves.
@@ -356,7 +363,10 @@ def test_histogram(self):
 
     def test_sampled_histogram(self):
         # Submit a sampled histogram.
-        stats = MetricsAggregator('myhost')
+        # The min is not enabled by default
+        stats = MetricsAggregator('myhost',
+            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min']
+        )
         stats.submit_packets('sampled.hist:5|h|@0.5')
 
 
@@ -386,10 +396,15 @@ def test_batch_submission(self):
         assert gauge['points'][0][1] == 1
 
     def test_monokey_batching_notags(self):
-        stats = MetricsAggregator('host')
+        # The min is not enabled by default
+        stats = MetricsAggregator('host',
+            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min']
+        )
         stats.submit_packets('test_hist:0.3|ms:2.5|ms|@0.5:3|ms')
 
-        stats_ref = MetricsAggregator('host')
+        stats_ref = MetricsAggregator('host',
+            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min']
+        )
         packets = [
                 'test_hist:0.3|ms',
                 'test_hist:2.5|ms|@0.5',
@@ -428,10 +443,15 @@ def test_monokey_batching_withtags(self):
 
 
     def test_monokey_batching_withtags_with_sampling(self):
-        stats = MetricsAggregator('host')
+        # The min is not enabled by default
+        stats = MetricsAggregator('host',
+            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min']
+        )
         stats.submit_packets('test_metric:1.5|c|#tag1:one,tag2:two:2.3|g|#tag3:three:3|g:42|h|#tag1:12,tag42:42|@0.22')
 
-        stats_ref = MetricsAggregator('host')
+        stats_ref = MetricsAggregator('host',
+            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min']
+        )
         packets = [
                 'test_metric:1.5|c|#tag1:one,tag2:two',
                 'test_metric:2.3|g|#tag3:three',
@@ -478,7 +498,12 @@ def test_metrics_expiry(self):
         # Ensure metrics eventually expire and stop submitting.
         ag_interval = 1
         expiry = ag_interval * 4 + 2
-        stats = MetricsAggregator('myhost', interval=ag_interval, expiry_seconds=expiry)
+        # The min is not enabled by default
+        stats = MetricsAggregator('myhost',
+            interval=ag_interval,
+            expiry_seconds=expiry,
+            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min']
+        )
         stats.submit_packets('test.counter:123|c')
         stats.submit_packets('test.gauge:55|g')
         stats.submit_packets('test.set:44|s')
@@ -636,7 +661,11 @@ def test_event_text(self):
 
     def test_recent_point_threshold(self):
         threshold = 100
-        stats = MetricsAggregator('myhost', recent_point_threshold=threshold)
+        # The min is not enabled by default
+        stats = MetricsAggregator('myhost',
+            recent_point_threshold=threshold,
+            histogram_aggregates=DEFAULT_HISTOGRAM_AGGREGATES+['min']
+        )
         timestamp_beyond_threshold = time.time() - threshold*2
         timestamp_within_threshold = time.time() - threshold/2
 
diff --git a/tests/test_go_expvar.py b/tests/test_go_expvar.py
index 15bd6335a3..fc22151613 100644
--- a/tests/test_go_expvar.py
+++ b/tests/test_go_expvar.py
@@ -55,7 +55,7 @@ def testGoExpVar(self):
         metrics = self.check.get_metrics()
 
         # The rate is not here so only 2
-        self.assertEqual(len(metrics), 16, metrics)
+        self.assertEqual(len(metrics), 15, metrics)
         self._assert_metric_number(metrics, 'go_expvar.gauge1', 1)
         self._assert_metric_number(metrics, 'go_expvar.memstats.by_size.1.mallocs', 1)
 
diff --git a/tests/test_histogram.py b/tests/test_histogram.py
new file mode 100644
index 0000000000..4deea174df
--- /dev/null
+++ b/tests/test_histogram.py
@@ -0,0 +1,143 @@
+import unittest
+
+from aggregator import MetricsAggregator, Histogram
+from config import get_histogram_aggregates, get_histogram_percentiles
+
+class TestHistogram(unittest.TestCase):
+    def test_default(self):
+        stats = MetricsAggregator('myhost')
+
+        for i in xrange(20):
+            stats.submit_packets('myhistogram:{0}|h'.format(i))
+
+        metrics = stats.flush()
+
+        self.assertEquals(len(metrics), 5, metrics)
+
+        value_by_type = {}
+        for k in metrics:
+            value_by_type[k['metric'][len('myhistogram')+1:]] = k['points'][0][1]
+
+        self.assertEquals(sorted(value_by_type.keys()),
+                ['95percentile', 'avg', 'count', 'max', 'median'], value_by_type)
+
+        self.assertEquals(value_by_type['max'], 19, value_by_type)
+        self.assertEquals(value_by_type['median'], 9, value_by_type)
+        self.assertEquals(value_by_type['avg'], 9.5, value_by_type)
+        self.assertEquals(value_by_type['count'], 20.0, value_by_type)
+        self.assertEquals(value_by_type['95percentile'], 18, value_by_type)
+
+    def test_custom_single_percentile(self):
+        configstr = '0.40'
+        stats = MetricsAggregator('myhost',
+            histogram_percentiles=get_histogram_percentiles(configstr)
+        )
+
+        self.assertEquals(
+            stats.metric_config[Histogram]['percentiles'],
+            [0.40],
+            stats.metric_config[Histogram]
+        )
+
+        for i in xrange(20):
+            stats.submit_packets('myhistogram:{0}|h'.format(i))
+
+        metrics = stats.flush()
+
+        self.assertEquals(len(metrics), 5, metrics)
+
+        value_by_type = {}
+        for k in metrics:
+            value_by_type[k['metric'][len('myhistogram')+1:]] = k['points'][0][1]
+
+        self.assertEquals(value_by_type['40percentile'], 7, value_by_type)
+
+    def test_custom_multiple_percentile(self):
+        configstr = '0.4, 0.65, 0.999'
+        stats = MetricsAggregator('myhost',
+            histogram_percentiles=get_histogram_percentiles(configstr)
+        )
+
+        self.assertEquals(
+            stats.metric_config[Histogram]['percentiles'],
+            [0.4, 0.65, 0.99],
+            stats.metric_config[Histogram]
+        )
+
+        for i in xrange(20):
+            stats.submit_packets('myhistogram:{0}|h'.format(i))
+
+        metrics = stats.flush()
+
+        self.assertEquals(len(metrics), 7, metrics)
+
+        value_by_type = {}
+        for k in metrics:
+            value_by_type[k['metric'][len('myhistogram')+1:]] = k['points'][0][1]
+
+        self.assertEquals(value_by_type['40percentile'], 7, value_by_type)
+        self.assertEquals(value_by_type['65percentile'], 12, value_by_type)
+        self.assertEquals(value_by_type['99percentile'], 19, value_by_type)
+
+    def test_custom_invalid_percentile(self):
+        configstr = '1.2342'
+        stats = MetricsAggregator('myhost',
+            histogram_percentiles=get_histogram_percentiles(configstr)
+        )
+
+        self.assertEquals(
+            stats.metric_config[Histogram]['percentiles'],
+            [],
+            stats.metric_config[Histogram]
+        )
+
+    def test_custom_invalid_percentile2(self):
+        configstr = 'aoeuoeu'
+        stats = MetricsAggregator('myhost',
+            histogram_percentiles=get_histogram_percentiles(configstr)
+        )
+
+        self.assertEquals(
+            stats.metric_config[Histogram]['percentiles'],
+            [],
+            stats.metric_config[Histogram]
+        )
+
+    def test_custom_invalid_percentile3skip(self):
+        configstr = 'aoeuoeu, 2.23, 0.8, 23'
+        stats = MetricsAggregator('myhost',
+            histogram_percentiles=get_histogram_percentiles(configstr)
+        )
+
+        self.assertEquals(
+            stats.metric_config[Histogram]['percentiles'],
+            [0.8],
+            stats.metric_config[Histogram]
+        )
+
+    def test_custom_aggregate(self):
+        configstr = 'median, max'
+        stats = MetricsAggregator('myhost',
+            histogram_aggregates=get_histogram_aggregates(configstr)
+        )
+
+        self.assertEquals(
+            sorted(stats.metric_config[Histogram]['aggregates']),
+            ['max', 'median'],
+            stats.metric_config[Histogram]
+        )
+
+        for i in xrange(20):
+            stats.submit_packets('myhistogram:{0}|h'.format(i))
+
+        metrics = stats.flush()
+
+        self.assertEquals(len(metrics), 3, metrics)
+
+        value_by_type = {}
+        for k in metrics:
+            value_by_type[k['metric'][len('myhistogram')+1:]] = k['points'][0][1]
+
+        self.assertEquals(value_by_type['median'], 9, value_by_type)
+        self.assertEquals(value_by_type['max'], 19, value_by_type)
+        self.assertEquals(value_by_type['95percentile'], 18, value_by_type)