forked from DataDog/ansible-datadog-callback
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdatadog_callback.py
343 lines (297 loc) · 12.5 KB
/
datadog_callback.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
from __future__ import absolute_import, print_function
import getpass
import logging
import os
import time
try:
import datadog
import yaml
from packaging import version
HAS_MODULES = True
except ImportError:
HAS_MODULES = False
import ansible
from ansible.plugins.callback import CallbackBase
from __main__ import cli
ANSIBLE_ABOVE_28 = False
if HAS_MODULES and version.parse(ansible.__version__) >= version.parse('2.8.0'):
ANSIBLE_ABOVE_28 = True
from ansible.context import CLIARGS
DEFAULT_DD_URL = "https://api.datadoghq.com"
class CallbackModule(CallbackBase):
def __init__(self):
if not HAS_MODULES:
self.disabled = True
print('Datadog callback disabled: missing "datadog", "yaml", and/or "packaging" python package.')
else:
self.disabled = False
# Set logger level - datadog api and urllib3
for log_name in ['requests.packages.urllib3', 'datadog.api']:
self._set_logger_level(log_name)
self._playbook_name = None
self._start_time = time.time()
self._options = None
if HAS_MODULES and cli:
if ANSIBLE_ABOVE_28:
self._options = CLIARGS
else:
self._options = cli.options
# self.playbook is set in the `v2_playbook_on_start` callback method
self.playbook = None
# self.play is set in the `playbook_on_play_start` callback method
self.play = None
# Set logger level
def _set_logger_level(self, name, level=logging.WARNING):
try:
log = logging.getLogger(name)
log.setLevel(level)
log.propagate = False
except Exception as e:
# We don't want Ansible to fail on an API error
print("Couldn't get logger - %s" % name)
print(e)
# Load parameters from conf file
def _load_conf(self, file_path):
conf_dict = {}
if os.path.isfile(file_path):
with open(file_path, 'r') as conf_file:
conf_dict = yaml.load(conf_file, Loader=yaml.FullLoader)
api_key = os.environ.get('DATADOG_API_KEY', conf_dict.get('api_key', ''))
dd_url = os.environ.get('DATADOG_URL', conf_dict.get('url', ''))
dd_site = os.environ.get('DATADOG_SITE', conf_dict.get('site', ''))
return api_key, dd_url, dd_site
# Send event to Datadog
def _send_event(self, title, alert_type=None, text=None, tags=None, host=None, event_type=None, event_object=None):
if tags is None:
tags = []
tags.extend(self.default_tags)
priority = 'normal' if alert_type == 'error' else 'low'
try:
datadog.api.Event.create(
title=title,
text=text,
alert_type=alert_type,
priority=priority,
tags=tags,
host=host,
source_type_name='ansible',
event_type=event_type,
event_object=event_object,
)
except Exception as e:
# We don't want Ansible to fail on an API error
print('Couldn\'t send event "{0}" to Datadog'.format(title))
print(e)
# Send event, aggregated with other task-level events from the same host
def send_task_event(self, title, alert_type='info', text='', tags=None, host=None):
if getattr(self, 'play', None):
if tags is None:
tags = []
tags.append('play:{0}'.format(self.play.name))
self._send_event(
title,
alert_type=alert_type,
text=text,
tags=tags,
host=host,
event_type='config_management.task',
event_object=host,
)
# Send event, aggregated with other playbook-level events from the same playbook and of the same type
def send_playbook_event(self, title, alert_type='info', text='', tags=None, event_type=''):
self._send_event(
title,
alert_type=alert_type,
text=text,
tags=tags,
event_type='config_management.run.{0}'.format(event_type),
event_object=self._playbook_name,
)
# Send ansible metric to Datadog
def send_metric(self, metric, value, tags=None, host=None):
if tags is None:
tags = []
tags.extend(self.default_tags)
try:
datadog.api.Metric.send(
metric="ansible.{0}".format(metric),
points=value,
tags=tags,
host=host,
)
except Exception as e:
# We don't want Ansible to fail on an API error
print('Couldn\'t send metric "{0}" to Datadog'.format(metric))
print(e)
# Start timer to measure playbook running time
def start_timer(self):
self._start_time = time.time()
# Get the time elapsed since the timer was started
def get_elapsed_time(self):
return time.time() - self._start_time
# Default tags sent with events and metrics
@property
def default_tags(self):
return ['playbook:{0}'.format(self._playbook_name)]
@staticmethod
def pluralize(number, noun):
if number == 1:
return "{0} {1}".format(number, noun)
return "{0} {1}s".format(number, noun)
# format helper for event_text
@staticmethod
def format_result(res):
msg = "$$$\n{0}\n$$$\n".format(res['msg']) if res.get('msg') else ""
module_name = 'undefined'
if res.get('censored'):
event_text = res.get('censored')
elif not res.get('invocation'):
event_text = msg
else:
invocation = res['invocation']
module_name = invocation.get('module_name', 'undefined')
event_text = "$$$\n{0}[{1}]\n$$$\n".format(module_name, invocation.get('module_args', ''))
event_text += msg
if 'module_stdout' in res:
# On Ansible v2, details on internal failures of modules are not reported in the `msg`,
# so we have to extract the info differently
event_text += "$$$\n{0}\n{1}\n$$$\n".format(
res.get('module_stdout', ''), res.get('module_stderr', ''))
module_name_tag = 'module:{0}'.format(module_name)
return event_text, module_name_tag
### Ansible callbacks ###
def runner_on_failed(self, host, res, ignore_errors=False):
# don't post anything if user asked to ignore errors
if ignore_errors:
return
event_text, module_name_tag = self.format_result(res)
self.send_task_event(
'Ansible task failed on "{0}"'.format(host),
alert_type='error',
text=event_text,
tags=[module_name_tag],
host=host,
)
def runner_on_ok(self, host, res):
# Only send an event when the task has changed on the host
if res.get('changed'):
event_text, module_name_tag = self.format_result(res)
self.send_task_event(
'Ansible task changed on "{0}"'.format(host),
alert_type='success',
text=event_text,
tags=[module_name_tag],
host=host,
)
def runner_on_unreachable(self, host, res):
event_text = "\n$$$\n{0}\n$$$\n".format(res)
self.send_task_event(
'Ansible failed on unreachable host "{0}"'.format(host),
alert_type='error',
text=event_text,
host=host,
)
# Implementation compatible with Ansible v2 only
def v2_playbook_on_start(self, playbook):
# On Ansible v2, Ansible doesn't set `self.playbook` automatically
self.playbook = playbook
playbook_file_name = self.playbook._file_name
if ANSIBLE_ABOVE_28:
inventory = self._options['inventory']
else:
inventory = self._options.inventory
self.start_timer()
# Set the playbook name from its filename
self._playbook_name, _ = os.path.splitext(
os.path.basename(playbook_file_name))
if isinstance(inventory, (list, tuple)):
inventory = ','.join(inventory)
self._inventory_name = ','.join([os.path.basename(os.path.realpath(name)) for name in inventory.split(',') if name])
def v2_playbook_on_play_start(self, play):
# On Ansible v2, Ansible doesn't set `self.play` automatically
self.play = play
if self.disabled:
return
# Read config and hostvars
config_path = os.environ.get('ANSIBLE_DATADOG_CALLBACK_CONF_FILE', os.path.join(os.path.dirname(__file__), "datadog_callback.yml"))
api_key, dd_url, dd_site = self._load_conf(config_path)
# If there is no api key defined in config file, try to get it from hostvars
if api_key == '':
hostvars = self.play.get_variable_manager()._hostvars
if not hostvars:
print("No api_key found in the config file ({0}) and hostvars aren't set: disabling Datadog callback plugin".format(config_path))
self.disabled = True
else:
try:
api_key = str(hostvars['localhost']['datadog_api_key'])
if not dd_url:
dd_url = hostvars['localhost'].get('datadog_url')
if not dd_site:
dd_site = hostvars['localhost'].get('datadog_site')
except Exception as e:
print('No "api_key" found in the config file ({0}) and "datadog_api_key" is not set in the hostvars: disabling Datadog callback plugin'.format(config_path))
self.disabled = True
if not dd_url:
if dd_site:
dd_url = "https://api."+ dd_site
else:
dd_url = DEFAULT_DD_URL # default to Datadog US
# Set up API client and send a start event
if not self.disabled:
datadog.initialize(api_key=api_key, api_host=dd_url)
self.send_playbook_event(
'Ansible play "{0}" started in playbook "{1}" by "{2}" against "{3}"'.format(
self.play.name,
self._playbook_name,
getpass.getuser(),
self._inventory_name),
event_type='start',
)
def playbook_on_stats(self, stats):
total_tasks = 0
total_updated = 0
total_errors = 0
error_hosts = []
for host in stats.processed:
# Aggregations for the event text
summary = stats.summarize(host)
total_tasks += sum([summary['ok'], summary['failures'], summary['skipped']])
total_updated += summary['changed']
errors = sum([summary['failures'], summary['unreachable']])
if errors > 0:
error_hosts.append((host, summary['failures'], summary['unreachable']))
total_errors += errors
# Send metrics for this host
for metric, value in summary.items():
self.send_metric('task.{0}'.format(metric), value, host=host)
# Send playbook elapsed time
self.send_metric('elapsed_time', self.get_elapsed_time())
# Generate basic "Completed" event
event_title = 'Ansible playbook "{0}" completed in {1}'.format(
self._playbook_name,
self.pluralize(int(self.get_elapsed_time()), 'second'))
event_text = 'Ansible updated {0} out of {1} total, on {2}. {3} occurred.'.format(
self.pluralize(total_updated, 'task'),
self.pluralize(total_tasks, 'task'),
self.pluralize(len(stats.processed), 'host'),
self.pluralize(total_errors, 'error'))
alert_type = 'success'
# Add info to event if errors occurred
if total_errors > 0:
alert_type = 'error'
event_title += ' with errors'
event_text += "\nErrors occurred on the following hosts:\n%%%\n"
for host, failures, unreachable in error_hosts:
event_text += "- `{0}` (failure: {1}, unreachable: {2})\n".format(
host,
failures,
unreachable)
event_text += "\n%%%\n"
else:
event_title += ' successfully'
self.send_playbook_event(
event_title,
alert_type=alert_type,
text=event_text,
event_type='end',
)