Skip to content

Commit

Permalink
Merge branch 'master' into quentin/windows-omnibus-5.0
Browse files Browse the repository at this point in the history
* master: (21 commits)
  [Postgres] hard limit on pg count metric (#2575)
  [procfs] make /proc configurable.
  [sqlserver] send service checks at every run (#2515)
  [http_check] SNI support for cert_expiration (#2521)
  [dev-env] DL ez_setup from preferred URL
  Example syntax using a wildcard, recommended by customer see ticket https://datadog.zendesk.com/agent/tickets/54475
  [collector] don't send the api_key with resources
  [checks] remove api_key from events payload
  [packaging] 5.9.0 nightlies
  [win32] plays well memory check timeouts
  [rabbitmq] Avoid the extra double lookup for 'rabbitmq_api_url' (#2543)
  [source] archive setuptools
  Added a note on how to get bundler installed
  [kubernetes] disable use_histogram by default (#2542)
  [powerdns_recursor] rename configuration file (#2538)
  [changelog] release 5.8.2
  Revert "Fix get_hostname for windows ec2 instances"
  [changelog] release `5.8.1`
  fixes .yaml.default
  [core] fixes windows conf (#2528)
  ...
  • Loading branch information
degemer committed Jun 7, 2016
2 parents bbe9119 + 60ca25f commit f30225a
Show file tree
Hide file tree
Showing 34 changed files with 226 additions and 113 deletions.
24 changes: 23 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,26 @@
Changes
=======

# 5.8.2 / 05-24-2016
**Windows only**

### Details
https://github.com/DataDog/dd-agent/compare/5.8.1...5.8.2

### Changes
* [BUGFIX] Windows: Revert a change introduced in the `5.8.0` release, causing hostnames to change on EC2 hosts when no hostname is defined in `datadog.conf`, leading to potential backwards compatibility issues. See [#2451][], [#2535][]


# 5.8.1 / 05-23-2016
**Windows only**

### Details
https://github.com/DataDog/dd-agent/compare/5.8.0...5.8.1

### Changes
* [BUGFIX] Windows: Fix an issue that was preventing checks from loading properly. See [#2528][]


# 5.8.0 / 05-23-2016
**All platforms**

Expand Down Expand Up @@ -3089,6 +3109,8 @@ https://github.com/DataDog/dd-agent/compare/2.2.9...2.2.10
[#2512]: https://github.com/DataDog/dd-agent/issues/2512
[#2514]: https://github.com/DataDog/dd-agent/issues/2514
[#2516]: https://github.com/DataDog/dd-agent/issues/2516
[#2528]: https://github.com/DataDog/dd-agent/issues/2528
[#2535]: https://github.com/DataDog/dd-agent/issues/2535
[#3399]: https://github.com/DataDog/dd-agent/issues/3399
[@AirbornePorcine]: https://github.com/AirbornePorcine
[@AntoCard]: https://github.com/AntoCard
Expand Down Expand Up @@ -3222,4 +3244,4 @@ https://github.com/DataDog/dd-agent/compare/2.2.9...2.2.10
[@xkrt]: https://github.com/xkrt
[@yenif]: https://github.com/yenif
[@yyamano]: https://github.com/yyamano
[@zdannar]: https://github.com/zdannar
[@zdannar]: https://github.com/zdannar
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ pull request.

Required:
- python 2.7
- bundler
- bundler (to get it: `gem install bundler`)

```
# Clone the repository
Expand Down
2 changes: 1 addition & 1 deletion Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ task 'setup_env' do
`mkdir -p venv`
`wget -O venv/virtualenv.py https://raw.github.com/pypa/virtualenv/1.11.6/virtualenv.py`
`python venv/virtualenv.py --no-site-packages --no-pip --no-setuptools venv/`
`wget -O venv/ez_setup.py https://bitbucket.org/pypa/setuptools/raw/bootstrap/ez_setup.py`
`wget -O venv/ez_setup.py https://bootstrap.pypa.io/ez_setup.py`
`venv/bin/python venv/ez_setup.py --version="20.9.0"`
`wget -O venv/get-pip.py https://bootstrap.pypa.io/get-pip.py`
`venv/bin/python venv/get-pip.py`
Expand Down
4 changes: 3 additions & 1 deletion agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,9 @@ def run(self, config=None):

self._agentConfig = self._set_agent_config_hostname(config)
hostname = get_hostname(self._agentConfig)
systemStats = get_system_stats()
systemStats = get_system_stats(
proc_path=self._agentConfig.get('procfs_path', '/proc').rstrip('/')
)
emitters = self._get_emitters()

# Initialize service discovery
Expand Down
1 change: 1 addition & 0 deletions checks.d/agent_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def get_metric_context(self):
return self._collector_payload, self._metric_context

def check(self, instance):

if self.in_developer_mode:
stats, names_to_metric_types = self._psutil_config_to_stats(instance)
self._register_psutil_metrics(stats, names_to_metric_types)
Expand Down
6 changes: 6 additions & 0 deletions checks.d/btrfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

# project
from checks import AgentCheck
from utils.platform import Platform

MIXED = "mixed"
DATA = "data"
Expand Down Expand Up @@ -119,6 +120,11 @@ def get_usage(self, mountpoint):
def check(self, instance):
btrfs_devices = {}
excluded_devices = instance.get('excluded_devices', [])

if Platform.is_linux():
procfs_path = self.agentConfig.get('procfs_path', '/proc').rstrip('/')
psutil.PROCFS_PATH = procfs_path

for p in psutil.disk_partitions():
if (p.fstype == 'btrfs' and p.device not in btrfs_devices
and p.device not in excluded_devices):
Expand Down
3 changes: 3 additions & 0 deletions checks.d/disk.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ def check(self, instance):
# Windows and Mac will always have psutil
# (we have packaged for both of them)
if self._psutil():
if Platform.is_linux():
procfs_path = self.agentConfig.get('procfs_path', '/proc').rstrip('/')
psutil.PROCFS_PATH = procfs_path
self.collect_metrics_psutil()
else:
# FIXME: implement all_partitions (df -a)
Expand Down
5 changes: 5 additions & 0 deletions checks.d/gunicorn.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

# project
from checks import AgentCheck
from util import Platform


class GUnicornCheck(AgentCheck):
Expand All @@ -37,6 +38,10 @@ def check(self, instance):
""" Collect metrics for the given gunicorn instance. """
self.log.debug("Running instance: %s", instance)

if Platform.is_linux():
procfs_path = self.agentConfig.get('procfs_path', '/proc').rstrip('/')
psutil.PROCFS_PATH = procfs_path

# Validate the config.
if not instance or self.PROC_NAME not in instance:
raise GUnicornCheckError("instance must specify: %s" % self.PROC_NAME)
Expand Down
19 changes: 11 additions & 8 deletions checks.d/http_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,11 @@ def connect(self):

# Wrap socket using verification with the root certs in trusted_root_certs
self.sock = ssl_.ssl_wrap_socket(conn, self.key_file, self.cert_file,
cert_reqs=resolved_cert_reqs,
ca_certs=self.ca_certs,
server_hostname=hostname,
ssl_version=resolved_ssl_version,
ciphers=self.ciphers)
cert_reqs=resolved_cert_reqs,
ca_certs=self.ca_certs,
server_hostname=hostname,
ssl_version=resolved_ssl_version,
ciphers=self.ciphers)

if self.assert_fingerprint:
ssl_.assert_fingerprint(self.sock.getpeercert(binary_form=True), self.assert_fingerprint)
Expand Down Expand Up @@ -243,7 +243,7 @@ def _check(self, instance):
self.log.debug("Weak Ciphers will be used for {0}. Suppoted Cipherlist: {1}".format(
base_addr, WeakCiphersHTTPSConnection.SUPPORTED_CIPHERS))

r = sess.request('GET', addr, auth=auth, timeout=timeout, headers=headers, proxies = instance_proxy,
r = sess.request('GET', addr, auth=auth, timeout=timeout, headers=headers, proxies=instance_proxy,
verify=False if disable_ssl_validation else instance_ca_certs)

except (socket.timeout, requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
Expand Down Expand Up @@ -444,8 +444,11 @@ def check_cert_expiration(self, instance, timeout, instance_ca_certs):
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(float(timeout))
sock.connect((host, port))
ssl_sock = ssl.wrap_socket(sock, cert_reqs=ssl.CERT_REQUIRED,
ca_certs=instance_ca_certs)
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
context.verify_mode = ssl.CERT_REQUIRED
context.check_hostname = True
context.load_verify_locations(instance_ca_certs)
ssl_sock = context.wrap_socket(sock, server_hostname=host)
cert = ssl_sock.getpeercert()

except Exception as e:
Expand Down
17 changes: 14 additions & 3 deletions checks.d/linux_proc_extras.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,23 @@ def check(self, instance):

prio_counts = defaultdict(int)

with open('/proc/sys/fs/inode-nr', 'r') as inode_info:
proc_location = self.agentConfig.get('procfs_path', '/proc').rstrip('/')

proc_path_map = {
"inode_info": "sys/fs/inode-nr",
"stat_info": "stat",
"entropy_info": "sys/kernel/random/entropy_avail",
}

for key, path in proc_path_map.iteritems():
proc_path_map[key] = "{procfs}/{path}".format(procfs=proc_location, path=path)

with open(proc_path_map['inode_info'], 'r') as inode_info:
inode_stats = inode_info.readline().split()
self.gauge('system.inodes.total', float(inode_stats[0]), tags=tags)
self.gauge('system.inodes.used', float(inode_stats[1]), tags=tags)

with open('/proc/stat', 'r') as stat_info:
with open(proc_path_map['stat_info'], 'r') as stat_info:
lines = [line.strip() for line in stat_info.readlines()]

for line in lines:
Expand All @@ -50,7 +61,7 @@ def check(self, instance):
interrupts = int(line.split(' ')[1])
self.monotonic_count('system.linux.interrupts', interrupts, tags=tags)

with open('/proc/sys/kernel/random/entropy_avail') as entropy_info:
with open(proc_path_map['entropy_info'], 'r') as entropy_info:
entropy = entropy_info.readline()
self.gauge('system.entropy.available', float(entropy), tags=tags)

Expand Down
1 change: 0 additions & 1 deletion checks.d/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,6 @@ def create_event(self, last_state, state, clean_server_name, replset_name, agent
self.event({
'timestamp': int(time.time()),
'source_type_name': self.SOURCE_TYPE_NAME,
'api_key': agentConfig.get('api_key', ''),
'msg_title': msg_title,
'msg_text': msg,
'host': hostname,
Expand Down
6 changes: 6 additions & 0 deletions checks.d/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# project
from config import _is_affirmative
from checks import AgentCheck
from util import Platform

GAUGE = "gauge"
RATE = "rate"
Expand Down Expand Up @@ -288,6 +289,11 @@ def get_library_versions(self):
return {"pymysql": pymysql.__version__}

def check(self, instance):

if Platform.is_linux() and PSUTIL_AVAILABLE:
procfs_path = self.agentConfig.get('procfs_path', '/proc').rstrip('/')
psutil.PROCFS_PATH = procfs_path

host, port, user, password, mysql_sock, defaults_file, tags, options, queries, ssl = \
self._get_config(instance)

Expand Down
9 changes: 6 additions & 3 deletions checks.d/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ def _submit_regexed_values(self, output, regex_list):
self.rate(metric, self._parse_value(value.group(1)))

def _check_linux(self, instance):
proc_location = self.agentConfig.get('procfs_path', '/proc').rstrip('/')
if self._collect_cx_state:
try:
self.log.debug("Using `ss` to collect connection state")
Expand Down Expand Up @@ -186,7 +187,8 @@ def _check_linux(self, instance):
except SubprocessOutputEmptyError:
self.log.exception("Error collecting connection stats.")

proc = open('/proc/net/dev', 'r')
proc_dev_path = "{}/net/dev".format(proc_location)
proc = open(proc_dev_path, 'r')
try:
lines = proc.readlines()
finally:
Expand All @@ -213,7 +215,8 @@ def _check_linux(self, instance):
self._submit_devicemetrics(iface, metrics)

try:
proc = open('/proc/net/snmp', 'r')
proc_snmp_path = "{}/net/snmp".format(proc_location)
proc = open(proc_snmp_path, 'r')

# IP: Forwarding DefaultTTL InReceives InHdrErrors ...
# IP: 2 64 377145470 0 ...
Expand Down Expand Up @@ -271,7 +274,7 @@ def _check_linux(self, instance):

except IOError:
# On Openshift, /proc/net/snmp is only readable by root
self.log.debug("Unable to read /proc/net/snmp.")
self.log.debug("Unable to read %s.", proc_snmp_path)

# Parse the output of the command that retrieves the connection state (either `ss` or `netstat`)
# Returns a dict metric_name -> value
Expand Down
26 changes: 17 additions & 9 deletions checks.d/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from config import _is_affirmative

MAX_CUSTOM_RESULTS = 100
TABLE_COUNT_LIMIT = 200


class ShouldRestartException(Exception):
Expand Down Expand Up @@ -116,7 +117,6 @@ class PostgreSql(AgentCheck):
'relation': False,
}


REL_METRICS = {
'descriptors': [
('relname', 'table'),
Expand Down Expand Up @@ -193,10 +193,14 @@ class PostgreSql(AgentCheck):
},
'relation': False,
'query': """
SELECT schemaname, count(*)
FROM %s
GROUP BY schemaname
"""
SELECT schemaname, count(*) FROM
(
SELECT schemaname
FROM %s
ORDER BY schemaname, relname
LIMIT {table_count_limit}
) AS subquery GROUP BY schemaname
""".format(table_count_limit=TABLE_COUNT_LIMIT)
}

REPLICATION_METRICS_9_1 = {
Expand Down Expand Up @@ -412,7 +416,7 @@ def _build_relations_config(self, yamlconfig):
self.log.warn('Failed to parse config element=%s, check syntax' % str(element))
return config

def _collect_stats(self, key, db, instance_tags, relations, custom_metrics, function_metrics):
def _collect_stats(self, key, db, instance_tags, relations, custom_metrics, function_metrics, count_metrics):
"""Query pg_stat_* for various metrics
If relations is not an empty list, gather per-relation metrics
on top of that.
Expand All @@ -422,12 +426,14 @@ def _collect_stats(self, key, db, instance_tags, relations, custom_metrics, func
metric_scope = [
self.CONNECTION_METRICS,
self.LOCK_METRICS,
self.COUNT_METRICS,
]

if function_metrics:
metric_scope.append(self.FUNCTION_METRICS)

if count_metrics:
metric_scope.append(self.COUNT_METRICS)

# These are added only once per PG server, thus the test
db_instance_metrics = self._get_instance_metrics(key, db)
bgw_instance_metrics = self._get_bgw_metrics(key, db)
Expand Down Expand Up @@ -633,6 +639,8 @@ def check(self, instance):
relations = instance.get('relations', [])
ssl = _is_affirmative(instance.get('ssl', False))
function_metrics = _is_affirmative(instance.get('collect_function_metrics', False))
# Default value for `count_metrics` is True for backward compatibility
count_metrics = _is_affirmative(instance.get('collect_count_metrics', True))

if relations and not dbname:
self.warning('"dbname" parameter must be set when using the "relations" parameter.')
Expand Down Expand Up @@ -665,11 +673,11 @@ def check(self, instance):
db = self.get_connection(key, host, port, user, password, dbname, ssl)
version = self._get_version(key, db)
self.log.debug("Running check against version %s" % version)
self._collect_stats(key, db, tags, relations, custom_metrics, function_metrics)
self._collect_stats(key, db, tags, relations, custom_metrics, function_metrics, count_metrics)
except ShouldRestartException:
self.log.info("Resetting the connection")
db = self.get_connection(key, host, port, user, password, dbname, ssl, use_cached=False)
self._collect_stats(key, db, tags, relations, custom_metrics, function_metrics)
self._collect_stats(key, db, tags, relations, custom_metrics, function_metrics, count_metrics)

if db is not None:
service_check_tags = self._get_service_check_tags(host, port, dbname)
Expand Down
6 changes: 4 additions & 2 deletions checks.d/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,10 @@ def __init__(self, name, init_config, agentConfig, instances=None):

if Platform.is_linux():
procfs_path = init_config.get('procfs_path')
if procfs_path:
psutil.PROCFS_PATH = procfs_path
if not procfs_path:
procfs_path = self.agentConfig.get('procfs_path', '/proc').rstrip('/')

psutil.PROCFS_PATH = procfs_path

# Process cache, indexed by instance
self.process_cache = defaultdict(dict)
Expand Down
8 changes: 3 additions & 5 deletions checks.d/rabbitmq.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,10 @@ def __init__(self, name, init_config, agentConfig, instances=None):
self.already_alerted = []

def _get_config(self, instance):
# make sure 'rabbitmq_api_url; is present
if 'rabbitmq_api_url' not in instance:
# make sure 'rabbitmq_api_url' is present and get parameters
base_url = instance.get('rabbitmq_api_url', None)
if not base_url:
raise Exception('Missing "rabbitmq_api_url" in RabbitMQ config.')

# get parameters
base_url = instance['rabbitmq_api_url']
if not base_url.endswith('/'):
base_url += '/'
username = instance.get('rabbitmq_user', 'guest')
Expand Down
Loading

0 comments on commit f30225a

Please sign in to comment.