Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

local checks in /etc/dd-agent/checks.d/ #413

Merged
merged 14 commits into from
Mar 19, 2013
87 changes: 40 additions & 47 deletions checks/build.py → checks.d/jenkins.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import os
import re
import socket
import time
import traceback

try:
from collections import defaultdict
Expand All @@ -18,16 +15,14 @@
from elementtree import ElementTree

from util import get_hostname
from checks import AgentCheck

class Continue(Exception):
pass

class Hudson(object):
key = 'Hudson'
class Jenkins(AgentCheck):
datetime_format = '%Y-%m-%d_%H-%M-%S'

def __init__(self):
self.high_watermarks = None
def __init__(self, name, init_config, agentConfig):
AgentCheck.__init__(self, name, init_config, agentConfig)
self.high_watermarks = {}

def _extract_timestamp(self, dir_name):
try:
Expand All @@ -39,11 +34,12 @@ def _extract_timestamp(self, dir_name):
raise Exception("Error with build directory name, not a parsable date: %s" % (dir_name))

def _get_build_metadata(self, dir_name):
# Read the build.xml metadata file that Hudson generates
# Read the build.xml metadata file that Jenkins generates
build_metadata = os.path.join(dir_name, 'build.xml')

if not os.access(build_metadata, os.R_OK):
raise Continue("Can't read build file at %s" % (build_metadata))
self.log.debug("Can't read build file at %s" % (build_metadata))
raise Exception("Can't read build file at %s" % (build_metadata))
else:
tree = ElementTree()
tree.parse(build_metadata)
Expand All @@ -56,7 +52,7 @@ def _get_build_metadata(self, dir_name):
if v is not None])
return d

def _get_build_results(self, logger, job_dir):
def _get_build_results(self, instance_key, job_dir):
job_name = os.path.basename(job_dir)

try:
Expand All @@ -68,11 +64,11 @@ def _get_build_results(self, logger, job_dir):
dir_name = dirs[index]
timestamp = self._extract_timestamp(dir_name)
# Check if it's a new build
if timestamp > self.high_watermarks[job_name]:
if timestamp > self.high_watermarks[instance_key][job_name]:
# If we can't get build metadata, we try the previous one
try:
build_metadata = self._get_build_metadata(dir_name)
except:
except Exception:
continue

output = {
Expand All @@ -81,54 +77,51 @@ def _get_build_results(self, logger, job_dir):
'event_type': 'build result'
}
output.update(build_metadata)
self.high_watermarks[job_name] = timestamp
self.high_watermarks[instance_key][job_name] = timestamp
yield output
# If it not a new build, stop here
else:
break
except Exception, e:
log.error("Error while working on job %s, exception: %s" % (job_name, e))
self.log.error("Error while working on job %s, exception: %s" % (job_name, e))

def check(self, logger, agentConfig):
if self.high_watermarks is None:
def check(self, instance, create_event=True):
if self.high_watermarks.get(instance.get('name'), None) is None:
# On the first run of check(), prime the high_watermarks dict
# so that we only send events that occured after the agent
# started.
# (Setting high_watermarks in the next statement prevents
# any kind of infinite loop (assuming nothing ever sets
# high_watermarks to None again!))
self.high_watermarks = defaultdict(lambda: 0)
self.check(logger, agentConfig)
self.high_watermarks[instance.get('name')] = defaultdict(lambda: 0)
self.check(instance, create_event=False)

hudson_home = agentConfig.get('hudson_home', None)
jenkins_home = instance.get('jenkins_home', None)

if not hudson_home:
return False
if not jenkins_home:
raise Exception("No jenkins_home directory set in the config file")

job_dirs = glob(os.path.join(hudson_home, 'jobs', '*'))
job_dirs = glob(os.path.join(jenkins_home, 'jobs', '*'))

build_events = []

for job_dir in job_dirs:
for output in self._get_build_results(logger, job_dir):
output['api_key'] = agentConfig['api_key']
output['host'] = get_hostname(agentConfig)
build_events.append(output)

return build_events

if __name__ == '__main__':
import logging
import sys

hudson_home, apiKey = sys.argv[1:3]

logger = logging.getLogger('ddagent.checks.hudson')
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler())
hudson = Hudson()
while True:
print hudson.check(logger,
{'hudson_home': hudson_home,
'api_key': apiKey})
time.sleep(5)
for output in self._get_build_results(instance.get('name'), job_dir):
output['api_key'] = self.agentConfig['api_key']
output['host'] = get_hostname(self.agentConfig)
if create_event:
self.log.debug("Creating event for job: %s" % output['job_name'])
self.event(output)

@staticmethod
def parse_agent_config(agentConfig):
if not agentConfig.get('hudson_home'):
return False

return {
'instances': [{
'name': 'default',
'jenkins_home': agentConfig.get('hudson_home'),
}]
}

68 changes: 63 additions & 5 deletions checks.d/lighttpd.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,55 @@
class Lighttpd(AgentCheck):
"""Tracks basic connection/requests/workers metrics

See http://redmine.lighttpd.net/projects/1/wiki/Docs_ModStatus for more details
See http://redmine.lighttpd.net/projects/1/wiki/Docs_ModStatus for Lighttpd details
See http://redmine.lighttpd.net/projects/lighttpd2/wiki/Mod_status for Lighttpd2 details
"""

URL_SUFFIX_PER_VERSION = {
1: '?auto',
2: '?format=plain',
'Unknown': '?auto'
}

GAUGES = {
'IdleServers': 'lighttpd.performance.idle_server',
'BusyServers': 'lighttpd.performance.busy_servers',
'Uptime': 'lighttpd.performance.uptime',
'Total kBytes': 'lighttpd.net.bytes',
'Total Accesses': 'lighttpd.net.hits',
'memory_usage': 'lighttpd2.performance.memory_usage',
'requests_avg': 'lighttpd2.net.requests_avg',
'traffic_out_avg': 'lighttpd2.net.bytes_out_avg',
'traffic_in_avg': 'lighttpd2.net.bytes_in_avg',
'connections_avg': 'lighttpd2.net.connections_avg',
'connection_state_start': 'lighttpd2.connections.state_start',
'connection_state_read_header': 'lighttpd2.connections.state_read_header',
'connection_state_handle_request': 'lighttpd2.connections.state_handle_request',
'connection_state_write_response': 'lighttpd2.connections.state_write_response',
'connection_state_keep_alive': 'lighttpd2.connections.state_keep_alive',
'requests_avg_5sec': 'lighttpd2.net.requests_avg_5sec',
'traffic_out_avg_5sec': 'lighttpd2.net.bytes_out_avg_5sec',
'traffic_in_avg_5sec': 'lighttpd2.net.bytes_in_avg_5sec',
'connections_avg_5sec': 'lighttpd2.net.connections_avg_5sec',
}

COUNTERS = {
'requests_abs': 'lighttpd2.net.requests_total',
'traffic_out_abs': 'lighttpd2.net.bytes_out',
'traffic_in_abs': 'lighttpd2.net.bytes_in',
'connections_abs': 'lighttpd2.net.connections_total',
'status_1xx': 'lighttpd2.response.status_1xx',
'status_2xx': 'lighttpd2.response.status_2xx',
'status_3xx': 'lighttpd2.response.status_3xx',
'status_4xx': 'lighttpd2.response.status_4xx',
'status_5xx': 'lighttpd2.response.status_5xx',
}

RATES = {
'Total kBytes': 'lighttpd.net.bytes_per_s',
'Total Accesses': 'lighttpd.net.request_per_s'
}


def __init__(self, name, init_config, agentConfig, instances=None):
AgentCheck.__init__(self, name, init_config, agentConfig, instances)
self.assumed_url = {}
Expand All @@ -33,9 +66,12 @@ def check(self, instance):
url = self.assumed_url.get(instance['lighttpd_status_url'], instance['lighttpd_status_url'])

tags = instance.get('tags', [])
self.log.debug("Connecting to %s" % url)
req = urllib2.Request(url, None,
headers(self.agentConfig))
request = urllib2.urlopen(req)
headers_resp = request.info().headers
server_version = self._get_server_version(headers_resp)
response = request.read()

metric_count = 0
Expand Down Expand Up @@ -65,11 +101,33 @@ def check(self, instance):
metric_name = self.RATES[metric]
self.rate(metric_name, value, tags=tags)

# Send metric as a counter, if applicable
if metric in self.COUNTERS:
metric_count += 1
metric_name = self.COUNTERS[metric]
self.increment(metric_name, value, tags=tags)

if metric_count == 0:
if self.assumed_url.get(instance['lighttpd_status_url'], None) is None and url[-5:] != '?auto':
self.assumed_url[instance['lighttpd_status_url']] = '%s?auto' % url
self.log.debug("Assuming url was not correct. Trying to add ?auto suffix to the url")
url_suffix = self.URL_SUFFIX_PER_VERSION[server_version]
if self.assumed_url.get(instance['lighttpd_status_url'], None) is None and url[-len(url_suffix):] != url_suffix:
self.assumed_url[instance['lighttpd_status_url']] = '%s%s' % (url, url_suffix)
self.log.debug("Assuming url was not correct. Trying to add %s suffix to the url" % url_suffix)
self.check(instance)
else:
raise Exception("No metrics were fetched for this instance. Make sure that %s is the proper url." % instance['lighttpd_status_url'])

def _get_server_version(self, headers):
for h in headers:
if "Server:" not in h:
continue
try:
version = int(h.split('/')[1][0])
except Exception, e:
self.log.debug("Error while trying to get server version %s" % str(e))
version = "Unknown"
self.log.debug("Lighttpd server version is %s" % version)
return version

self.log.debug("Lighttpd server version is Unknown")
return "Unknown"

2 changes: 0 additions & 2 deletions checks/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import checks.system.win32 as w32
from checks.agent_metrics import CollectorMetrics
from checks.nagios import Nagios
from checks.build import Hudson
from checks.db.mysql import MySql
from checks.db.mcache import Memcache
from checks.queue import RabbitMq
Expand Down Expand Up @@ -105,7 +104,6 @@ def __init__(self, agentConfig, emitters, systemStats):
# Event Checks
self._event_checks = [
Nagios(get_hostname()),
Hudson()
]

# Resource Checks
Expand Down
10 changes: 10 additions & 0 deletions conf.d/jenkins.yaml.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
init_config:

instances:
# For every instance, you have a `name` and a `jenkins_home`

- name: default
jenkins_home: /var/lib/jenkins

- name
jenkins_home: /var/lib/jenkins2
5 changes: 5 additions & 0 deletions conf.d/lighttpd.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,8 @@ instances:
- lighttpd_status_url: http://example2.com:1234/server-status?auto
tags:
- instance:bar

# Lighttpd2 status url
- lighttpd_status_url: http://example.com/server-status?format=plain
tags:
- instance:l2
Loading