diff --git a/.travis.yml b/.travis.yml index 88233dc00..a1e1bbcc2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,6 +40,7 @@ env: - TRAVIS_FLAVOR=snmpwalk - TRAVIS_FLAVOR=hbase_master - TRAVIS_FLAVOR=hbase_regionserver + - TRAVIS_FLAVOR=traefik # END OF TRAVIS MATRIX before_install: diff --git a/circle.yml b/circle.yml index 244236f07..169e8df6d 100644 --- a/circle.yml +++ b/circle.yml @@ -42,6 +42,7 @@ test: - rake ci:run[snmpwalk] - rake ci:run[hbase_master] - rake ci:run[hbase_regionserver] + - rake ci:run[traefik] - bundle exec rake requirements post: - if [[ $(docker ps -a -q) ]]; then docker stop $(docker ps -a -q); fi diff --git a/traefik/README.md b/traefik/README.md new file mode 100644 index 000000000..3f7331140 --- /dev/null +++ b/traefik/README.md @@ -0,0 +1,31 @@ +# Traefik Integration + +## Overview + +Get metrics from traefik in real time to: + +* Visualize and monitor requests and error served by traefik + +## Installation + +Install the `dd-check-traefik` package manually or with your favorite configuration manager + +## Configuration + +Edit the `traefik.yaml` file to point to your server and port, set the masters to monitor. Alternatively you can setup [auto discovery](http://docs.datadoghq.com/guides/autodiscovery/) to configure the check automatically. + +## Validation + +When you run `datadog-agent info` you should see something like the following: + + Checks + ====== + + traefik + ----------- + - instance #0 [OK] + - Collected 7 metrics, 0 events & 0 service checks + +## Compatibility + +The traefik check is compatible with all major platforms diff --git a/traefik/check.py b/traefik/check.py new file mode 100644 index 000000000..89ca31eea --- /dev/null +++ b/traefik/check.py @@ -0,0 +1,32 @@ +from checks import CheckException +from checks.prometheus_check import PrometheusCheck + +EVENT_TYPE = SOURCE_TYPE_NAME = 'traefik' + +class TraefikCheck(PrometheusCheck): + """ + Collect traefik metrics from Prometheus + """ + def __init__(self, name, init_config, agentConfig, instances=None): + super(TraefikCheck, self).__init__(name, init_config, agentConfig, instances) + self.NAMESPACE = 'traefik' + + self.metrics_mapper = { + 'traefik_request_duration_seconds': 'request.duration', + 'traefik_requests_total': 'requests.total', + } + + + def check(self, instance): + endpoint = instance.get('prometheus_endpoint') + if endpoint is None: + raise CheckException("Unable to find prometheus_endpoint in config file.") + + send_buckets = instance.get('send_histograms_buckets', True) + # By default we send the buckets. + if send_buckets is not None and str(send_buckets).lower() == 'false': + send_buckets = False + else: + send_buckets = True + + self.process(endpoint, send_histograms_buckets=send_buckets, instance=instance) diff --git a/traefik/ci/metrics.txt b/traefik/ci/metrics.txt new file mode 100644 index 000000000..e1e9da4f5 --- /dev/null +++ b/traefik/ci/metrics.txt @@ -0,0 +1,111 @@ +# HELP go_gc_duration_seconds A summary of the GC invocation durations. +# TYPE go_gc_duration_seconds summary +go_gc_duration_seconds{quantile="0"} 6.0286000000000005e-05 +go_gc_duration_seconds{quantile="0.25"} 0.000190742 +go_gc_duration_seconds{quantile="0.5"} 0.000552204 +go_gc_duration_seconds{quantile="0.75"} 0.004004719 +go_gc_duration_seconds{quantile="1"} 0.014453783000000001 +go_gc_duration_seconds_sum 0.065373075 +go_gc_duration_seconds_count 26 +# HELP go_goroutines Number of goroutines that currently exist. +# TYPE go_goroutines gauge +go_goroutines 18 +# HELP go_memstats_alloc_bytes Number of bytes allocated and still in use. +# TYPE go_memstats_alloc_bytes gauge +go_memstats_alloc_bytes 4.681872e+06 +# HELP go_memstats_alloc_bytes_total Total number of bytes allocated, even if freed. +# TYPE go_memstats_alloc_bytes_total counter +go_memstats_alloc_bytes_total 3.5687832e+07 +# HELP go_memstats_buck_hash_sys_bytes Number of bytes used by the profiling bucket hash table. +# TYPE go_memstats_buck_hash_sys_bytes gauge +go_memstats_buck_hash_sys_bytes 1.455756e+06 +# HELP go_memstats_frees_total Total number of frees. +# TYPE go_memstats_frees_total counter +go_memstats_frees_total 312029 +# HELP go_memstats_gc_sys_bytes Number of bytes used for garbage collection system metadata. +# TYPE go_memstats_gc_sys_bytes gauge +go_memstats_gc_sys_bytes 616448 +# HELP go_memstats_heap_alloc_bytes Number of heap bytes allocated and still in use. +# TYPE go_memstats_heap_alloc_bytes gauge +go_memstats_heap_alloc_bytes 4.681872e+06 +# HELP go_memstats_heap_idle_bytes Number of heap bytes waiting to be used. +# TYPE go_memstats_heap_idle_bytes gauge +go_memstats_heap_idle_bytes 4.726784e+06 +# HELP go_memstats_heap_inuse_bytes Number of heap bytes that are in use. +# TYPE go_memstats_heap_inuse_bytes gauge +go_memstats_heap_inuse_bytes 7.397376e+06 +# HELP go_memstats_heap_objects Number of allocated objects. +# TYPE go_memstats_heap_objects gauge +go_memstats_heap_objects 32329 +# HELP go_memstats_heap_released_bytes_total Total number of heap bytes released to OS. +# TYPE go_memstats_heap_released_bytes_total counter +go_memstats_heap_released_bytes_total 4.726784e+06 +# HELP go_memstats_heap_sys_bytes Number of heap bytes obtained from system. +# TYPE go_memstats_heap_sys_bytes gauge +go_memstats_heap_sys_bytes 1.212416e+07 +# HELP go_memstats_last_gc_time_seconds Number of seconds since 1970 of last garbage collection. +# TYPE go_memstats_last_gc_time_seconds gauge +go_memstats_last_gc_time_seconds 1.4979863897165866e+09 +# HELP go_memstats_lookups_total Total number of pointer lookups. +# TYPE go_memstats_lookups_total counter +go_memstats_lookups_total 55 +# HELP go_memstats_mallocs_total Total number of mallocs. +# TYPE go_memstats_mallocs_total counter +go_memstats_mallocs_total 344358 +# HELP go_memstats_mcache_inuse_bytes Number of bytes in use by mcache structures. +# TYPE go_memstats_mcache_inuse_bytes gauge +go_memstats_mcache_inuse_bytes 2400 +# HELP go_memstats_mcache_sys_bytes Number of bytes used for mcache structures obtained from system. +# TYPE go_memstats_mcache_sys_bytes gauge +go_memstats_mcache_sys_bytes 16384 +# HELP go_memstats_mspan_inuse_bytes Number of bytes in use by mspan structures. +# TYPE go_memstats_mspan_inuse_bytes gauge +go_memstats_mspan_inuse_bytes 117280 +# HELP go_memstats_mspan_sys_bytes Number of bytes used for mspan structures obtained from system. +# TYPE go_memstats_mspan_sys_bytes gauge +go_memstats_mspan_sys_bytes 196608 +# HELP go_memstats_next_gc_bytes Number of heap bytes when next garbage collection will take place. +# TYPE go_memstats_next_gc_bytes gauge +go_memstats_next_gc_bytes 9.020091e+06 +# HELP go_memstats_other_sys_bytes Number of bytes used for other system allocations. +# TYPE go_memstats_other_sys_bytes gauge +go_memstats_other_sys_bytes 608876 +# HELP go_memstats_stack_inuse_bytes Number of bytes in use by the stack allocator. +# TYPE go_memstats_stack_inuse_bytes gauge +go_memstats_stack_inuse_bytes 458752 +# HELP go_memstats_stack_sys_bytes Number of bytes obtained from system for stack allocator. +# TYPE go_memstats_stack_sys_bytes gauge +go_memstats_stack_sys_bytes 458752 +# HELP go_memstats_sys_bytes Number of bytes obtained by system. Sum of all system allocations. +# TYPE go_memstats_sys_bytes gauge +go_memstats_sys_bytes 1.5476984e+07 +# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds. +# TYPE process_cpu_seconds_total counter +process_cpu_seconds_total 0.76 +# HELP process_max_fds Maximum number of open file descriptors. +# TYPE process_max_fds gauge +process_max_fds 524288 +# HELP process_open_fds Number of open file descriptors. +# TYPE process_open_fds gauge +process_open_fds 10 +# HELP process_resident_memory_bytes Resident memory size in bytes. +# TYPE process_resident_memory_bytes gauge +process_resident_memory_bytes 2.0475904e+07 +# HELP process_start_time_seconds Start time of the process since unix epoch in seconds. +# TYPE process_start_time_seconds gauge +process_start_time_seconds 1.49798469346e+09 +# HELP process_virtual_memory_bytes Virtual memory size in bytes. +# TYPE process_virtual_memory_bytes gauge +process_virtual_memory_bytes 5.7290752e+07 +# HELP traefik_request_duration_seconds How long it took to process the request. +# TYPE traefik_request_duration_seconds histogram +traefik_request_duration_seconds_bucket{service="http",le="0.1"} 1 +traefik_request_duration_seconds_bucket{service="http",le="0.3"} 1 +traefik_request_duration_seconds_bucket{service="http",le="1.2"} 1 +traefik_request_duration_seconds_bucket{service="http",le="5"} 1 +traefik_request_duration_seconds_bucket{service="http",le="+Inf"} 1 +traefik_request_duration_seconds_sum{service="http"} 0.000150557 +traefik_request_duration_seconds_count{service="http"} 1 +# HELP traefik_requests_total How many HTTP requests processed, partitioned by status code and method. +# TYPE traefik_requests_total counter +traefik_requests_total{code="404",method="GET",service="http"} 1 diff --git a/traefik/ci/traefik.rake b/traefik/ci/traefik.rake new file mode 100644 index 000000000..4b5868551 --- /dev/null +++ b/traefik/ci/traefik.rake @@ -0,0 +1,63 @@ +require 'ci/common' + +def traefik_version + ENV['FLAVOR_VERSION'] || 'latest' +end + +def traefik_rootdir + "#{ENV['INTEGRATIONS_DIR']}/traefik_#{traefik_version}" +end + +namespace :ci do + namespace :traefik do |flavor| + task before_install: ['ci:common:before_install'] + + task install: ['ci:common:install'] do + use_venv = in_venv + install_requirements('traefik/requirements.txt', + "--cache-dir #{ENV['PIP_CACHE']}", + "#{ENV['VOLATILE_DIR']}/ci.log", use_venv) + # sample docker usage + # sh %(docker run -d --name traefik -p 8080:8080 -p 80:80 -v $PWD/traefik.toml:/etc/traefik/traefik.toml traefik) + end + + task before_script: ['ci:common:before_script'] + + task script: ['ci:common:script'] do + this_provides = [ + 'traefik' + ] + Rake::Task['ci:common:run_tests'].invoke(this_provides) + end + + task before_cache: ['ci:common:before_cache'] + + task cleanup: ['ci:common:cleanup'] + # sample cleanup task + # task cleanup: ['ci:common:cleanup'] do + # sh %(docker stop traefik) + # sh %(docker rm traefik) + # end + + task :execute do + exception = nil + begin + %w(before_install install before_script).each do |u| + Rake::Task["#{flavor.scope.path}:#{u}"].invoke + end + Rake::Task["#{flavor.scope.path}:script"].invoke + Rake::Task["#{flavor.scope.path}:before_cache"].invoke + rescue => e + exception = e + puts "Failed task: #{e.class} #{e.message}".red + end + if ENV['SKIP_CLEANUP'] + puts 'Skipping cleanup, disposable environments are great'.yellow + else + puts 'Cleaning up' + Rake::Task["#{flavor.scope.path}:cleanup"].invoke + end + raise exception if exception + end + end +end diff --git a/traefik/conf.yaml.example b/traefik/conf.yaml.example new file mode 100644 index 000000000..7b92e2239 --- /dev/null +++ b/traefik/conf.yaml.example @@ -0,0 +1,16 @@ +init_config: + +instances: + # url of the metrics endpoint of prometheus + - prometheus_endpoint: http://172.17.0.2:80/metrics + # The histogram buckets can be noisy and generate a lot of tags. + # send_histograms_buckets controls whether or not you want to pull them. + # + # send_histograms_buckets: True + # + # Note that like all checks based on the PrometheusCheck class, you can add + # tags to the instance that will be added to all the metrics of this check + # instance. + # + # tags: + # - 'mytag1:myValue1' diff --git a/traefik/manifest.csv b/traefik/manifest.csv new file mode 100644 index 000000000..b0ab83d18 --- /dev/null +++ b/traefik/manifest.csv @@ -0,0 +1,11 @@ +{ + "maintainer": "haissam@datadoghq.com", + "manifest_version": "0.1.0", + "max_agent_version": "6.0.0", + "min_agent_version": "5.13.2", + "name": "traefik", + "short_description": "Traefik is a modern HTTP reverse proxy and load balancer made to deploy microservices with ease.", + "support": "contrib", + "supported_os": ["linux"], + "version": "0.1.0" +} diff --git a/traefik/metadata.csv b/traefik/metadata.csv new file mode 100644 index 000000000..fa7f29fda --- /dev/null +++ b/traefik/metadata.csv @@ -0,0 +1,4 @@ +metric_name,metric_type,interval,unit_name,per_unit_name,description,orientation,integration,short_name +traefik.request.duration.count,gauge,,request,,count of requests processed in less that $upper_bound seconds,0,traefik,request duration count +traefik.request.duration.sum,gauge,,request,,sum of requets processed in less than $upper_bound seconds,0,traefik, request duration sum +traefik.requests.total,gauge,,request,,number of HTTP requests processed tagged by status code and method,0,traefik,request count diff --git a/traefik/requirements.txt b/traefik/requirements.txt new file mode 100644 index 000000000..f2ceb610a --- /dev/null +++ b/traefik/requirements.txt @@ -0,0 +1,2 @@ +# integration pip requirements +protobuf==3.1.0 diff --git a/traefik/test_traefik.py b/traefik/test_traefik.py new file mode 100644 index 000000000..bfc4342e5 --- /dev/null +++ b/traefik/test_traefik.py @@ -0,0 +1,41 @@ +# (C) Datadog, Inc. 2010-2017 +# All rights reserved +# Licensed under Simplified BSD License (see LICENSE) + +# stdlib +import os +from mock import MagicMock +from nose.plugins.attrib import attr + +# project +from tests.checks.common import AgentCheckTest + + +@attr(requires='traefik') +class TestTraefik(AgentCheckTest): + CHECK_NAME = "traefik" + NAMESPACE = "traefik" + METRICS = [ + NAMESPACE + '.request.duration.count', + NAMESPACE + '.request.duration.sum', + NAMESPACE + '.requests.total', + ] + + def test_check(self): + instance = { + 'prometheus_endpoint': 'http://localhost/metrics', + } + + content_type = 'text/plain; version=0.0.4' + f_name = os.path.join(os.path.dirname(__file__), 'ci', 'metrics.txt') + with open(f_name, 'r') as f: + bin_data = f.read() + mocks = { + 'poll': MagicMock(return_value=[content_type, bin_data]) + } + self.run_check({'instances': [instance]}, mocks=mocks) + + for metric in self.METRICS: + self.assertMetric(metric) + + self.coverage_report()