Skip to content

Commit

Permalink
Merge pull request BrightcoveOS#642 from rtoma/endeca
Browse files Browse the repository at this point in the history
feat(endecadgraph): new collector
  • Loading branch information
kormoc committed Aug 5, 2014
2 parents e56ffd1 + 7ba1ff6 commit 94b9525
Show file tree
Hide file tree
Showing 4 changed files with 39,104 additions and 0 deletions.
124 changes: 124 additions & 0 deletions src/collectors/endecadgraph/endecadgraph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# coding=utf-8

"""
Collects stats from Endeca Dgraph/MDEX server.
Tested with: Endeca Information Access Platform version 6.3.0.655584
=== Authors
Jan van Bemmelen <jvanbemmelen@bol.com>
Renzo Toma <rtoma@bol.com>
"""

import diamond.collector
import urllib2
from StringIO import StringIO
import re
import sys
if sys.version_info >= (2, 5):
import xml.etree.cElementTree as ElementTree
ElementTree # workaround for pyflakes issue #13
else:
import cElementTree as ElementTree


class EndecaDgraphCollector(diamond.collector.Collector):

# ignore these elements, because they are of no use
IGNORE_ELEMENTS = [
'most_expensive_queries',
'general_information',
'analytics_performance',
'disk_usage',
'configupdates',
'xqueryconfigupdates',
'spelling_updates',
'precomputed_sorts',
'analytics_performance',
'cache_slices',
]

# ignore these metrics, because they can be generated by graphite
IGNORE_STATS = [
'name',
'units',
]

# set of regular expressions for matching & sub'ing.
NUMVAL_MATCH = re.compile('^[\d\.e\-\+]*$')
CHAR_BLACKLIST = re.compile('\-|\ |,|:|/|>|\(|\)')
UNDERSCORE_UNDUPE = re.compile('_+')

# endeca xml namespace
XML_NS = '{http://xmlns.endeca.com/ene/dgraph}'

def get_default_config_help(self):
config_help = super(EndecaDgraphCollector, self).get_default_config_help()
config_help.update({
'host': "Hostname of Endeca Dgraph instance",
'port': "Port of the Dgraph API listener",
'timeout': "Timeout for http API calls",
})
return config_help

def get_default_config(self):
"""
Returns the default collector settings
"""
config = super(EndecaDgraphCollector, self).get_default_config()
config.update({
'path': 'endeca.dgraph',
'host': 'localhost',
'port': 8080,
'timeout': 1,
})
return config

def collect(self):

def makeSane(stat):
stat = self.CHAR_BLACKLIST.sub('_', stat.lower())
stat = self.UNDERSCORE_UNDUPE.sub('_', stat)
return stat

def createKey(element):
if element.attrib.get("name"):
key = element.attrib.get("name")
key = makeSane(key)
else:
key = element.tag[len(self.XML_NS):]
return key

def processElem(elem, keyList):
for k, v in elem.items():
prefix = '.'.join(keyList)
if k not in self.IGNORE_ELEMENTS and self.NUMVAL_MATCH.match(v):
k = makeSane(k)
self.publish('%s.%s' % (prefix, k), v)

def walkXML(context, elemList):
try:
for event, elem in context:
elemName = createKey(elem)
if event == 'start':
elemList.append(elemName)
if len(elem) == 0:
if set(elemList).intersection(self.IGNORE_ELEMENTS):
continue
processElem(elem, elemList)
elif event == 'end':
elemList.pop()
except Exception, e:
self.log.error('Something went wrong: %s', e)

url = 'http://%s:%d/admin?op=stats' % (self.config['host'], self.config['port'])
try:
xml = urllib2.urlopen(url, timeout=self.config['timeout']).read()
except Exception, e:
self.log.error('Could not connect to endeca on %s: %s' % (url, e))
return {}

context = ElementTree.iterparse(StringIO(xml), events=('start', 'end'))
elemList = []
walkXML(context, elemList)
Loading

0 comments on commit 94b9525

Please sign in to comment.