diff --git a/ara/cli/prometheus.py b/ara/cli/prometheus.py new file mode 100644 index 00000000..8326b62f --- /dev/null +++ b/ara/cli/prometheus.py @@ -0,0 +1,282 @@ +# Copyright (c) 2023 The ARA Records Ansible authors +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) + +import logging +import sys +import time +from collections import defaultdict +from datetime import datetime, timedelta + +from cliff.command import Command + +import ara.cli.utils as cli_utils +from ara.cli.base import global_arguments +from ara.clients.utils import get_client + +try: + from prometheus_client import Gauge, Summary, start_http_server + + HAS_PROMETHEUS_CLIENT = True +except ImportError: + HAS_PROMETHEUS_CLIENT = False + +# Where possible and relevant, apply these labels to the metrics so we can write prometheus +# queries to filter and aggregate by these properties +# TODO: make configurable +DEFAULT_PLAYBOOK_LABELS = [ + "ansible_version", + "client_version", + "controller", + "name", + "path", + "python_version", + "server_version", + "status", + "updated", + "user", +] +DEFAULT_TASK_LABELS = ["action", "name", "path", "playbook", "status", "updated"] +DEFAULT_HOST_LABELS = ["name", "playbook", "updated"] + + +# TODO: This could be made more flexible and live in a library +def get_search_results(client, kind, limit, created_after): + """ + kind: string, one of ["playbooks", "hosts", "tasks"] + limit: int, the number of items to return per page + created_after: string, a date formatted as such: 2020-01-31T15:45:36.737000Z + """ + query = f"/api/v1/{kind}?order=-id&limit={limit}" + if created_after is not None: + query += f"&created_after={created_after}" + + response = client.get(query) + items = response["results"] + + # Iterate through multiple pages of results if necessary + while response["next"]: + # For example: + # "next": "https://demo.recordsansible.org/api/v1/playbooks?limit=1000&offset=2000", + uri = response["next"].replace(client.endpoint, "") + response = client.get(uri) + items.extend(response["results"]) + + return items + + +class AraPlaybookCollector(object): + def __init__(self, client, log, limit, labels=DEFAULT_PLAYBOOK_LABELS): + self.client = client + self.log = log + self.limit = limit + self.labels = labels + + self.metrics = { + "range": Gauge("ara_playbooks_range", "Limit metric collection to the N most recent playbooks"), + "total": Gauge("ara_playbooks_total", "Total number of playbooks recorded by ara"), + "playbooks": Summary( + "ara_playbooks", "Labels and duration (in seconds) of playbooks recorded by ara", labels + ), + } + self.metrics["range"].set(self.limit) + + def collect_metrics(self, created_after=None): + playbooks = get_search_results(self.client, "playbooks", self.limit, created_after) + # Save the most recent timestamp so we only scrape beyond it next time + if playbooks: + created_after = cli_utils.increment_timestamp(playbooks[0]["created"]) + self.log.info(f"updating metrics for {len(playbooks)} playbooks...") + + for playbook in playbooks: + # The API returns a duration in string format, convert it back to seconds + # so we can use it as a value for the metric. + if playbook["duration"] is not None: + # TODO: parse_timedelta throws an exception for playbooks that last longer than a day + # That was meant to be fixed in https://github.com/ansible-community/ara/commit/db8243c3af938ece12c9cd59dd7fe4d9a711b76d + try: + seconds = cli_utils.parse_timedelta(playbook["duration"]) + except ValueError: + seconds = 0 + else: + seconds = 0 + + # Gather the values of each label so we can attach them to our metrics + labels = {label: playbook[label] for label in self.labels} + + self.metrics["playbooks"].labels(**labels).observe(seconds) + self.metrics["total"].inc() + + return created_after + + +class AraTaskCollector(object): + def __init__(self, client, log, limit, labels=DEFAULT_TASK_LABELS): + self.client = client + self.log = log + self.limit = limit + self.labels = labels + + self.metrics = { + "range": Gauge("ara_tasks_range", "Limit metric collection to the N most recent tasks"), + "total": Gauge("ara_tasks_total", "Number of tasks recorded by ara in prometheus"), + "tasks": Summary("ara_tasks", "Labels and duration, in seconds, of playbook tasks recorded by ara", labels), + } + self.metrics["range"].set(self.limit) + + def collect_metrics(self, created_after=None): + tasks = get_search_results(self.client, "tasks", self.limit, created_after) + # Save the most recent timestamp so we only scrape beyond it next time + if tasks: + created_after = cli_utils.increment_timestamp(tasks[0]["created"]) + self.log.info(f"updating metrics for {len(tasks)} tasks...") + + for task in tasks: + # The API returns a duration in string format, convert it back to seconds + # so we can use it as a value for the metric. + if task["duration"] is not None: + # TODO: parse_timedelta throws an exception for tasks that last longer than a day + # That was meant to be fixed in https://github.com/ansible-community/ara/commit/db8243c3af938ece12c9cd59dd7fe4d9a711b76d + try: + seconds = cli_utils.parse_timedelta(task["duration"]) + except ValueError: + seconds = 0 + else: + seconds = 0 + + # Gather the values of each label so we can attach them to our metrics + labels = {label: task[label] for label in self.labels} + + self.metrics["tasks"].labels(**labels).observe(seconds) + self.metrics["total"].inc() + + return created_after + + +class AraHostCollector(object): + def __init__(self, client, log, limit, labels=DEFAULT_HOST_LABELS): + self.client = client + self.log = log + self.limit = limit + self.labels = labels + + self.metrics = { + "changed": Gauge("ara_hosts_changed", "Number of changes on a host", labels), + "failed": Gauge("ara_hosts_failed", "Number of failures on a host", labels), + "ok": Gauge("ara_hosts_ok", "Number of successful tasks without changes on a host", labels), + "range": Gauge("ara_hosts_range", "Limit metric collection to the N most recent hosts"), + "skipped": Gauge("ara_hosts_skipped", "Number of skipped tasks on a host", labels), + "total": Gauge("ara_hosts_total", "Hosts recorded by ara"), + "unreachable": Gauge("ara_hosts_unreachable", "Number of unreachable errors on a host", labels), + } + self.metrics["range"].set(self.limit) + + def collect_metrics(self, created_after=None): + hosts = get_search_results(self.client, "hosts", self.limit, created_after) + # Save the most recent timestamp so we only scrape beyond it next time + if hosts: + created_after = cli_utils.increment_timestamp(hosts[0]["created"]) + self.log.info(f"updating metrics for {len(hosts)} hosts...") + + for host in hosts: + self.metrics["total"].inc() + + # Gather the values of each label so we can attach them to our metrics + labels = {label: host[label] for label in self.labels} + + # The values of "changed", "failed" and so on are integers so we can + # use them as values for our metric + for status in ["changed", "failed", "ok", "skipped", "unreachable"]: + if host[status]: + self.metrics[status].labels(**labels).set(host[status]) + + return created_after + + +class PrometheusExporter(Command): + """Exposes a prometheus exporter to provide metrics from an instance of ara""" + + log = logging.getLogger(__name__) + + def get_parser(self, prog_name): + parser = super().get_parser(prog_name) + parser = global_arguments(parser) + # fmt: off + parser.add_argument( + '--playbook-limit', + help='Max number of playbooks to request at once (default: 1000)', + default=1000, + type=int + ) + parser.add_argument( + '--task-limit', + help='Max number of tasks to request at once (default: 2500)', + default=2500, + type=int + ) + parser.add_argument( + '--host-limit', + help='Max number of hosts to request at once (default: 2500)', + default=2500, + type=int + ) + parser.add_argument( + '--poll-frequency', + help='Seconds to wait until querying ara for new metrics (default: 60)', + default=60, + type=int + ) + parser.add_argument( + '--prometheus-port', + help='Port on which the prometheus exporter will listen (default: 8001)', + default=8001, + type=int + ) + parser.add_argument( + '--max-days', + help='Maximum number of days to backfill metrics for (default: 90)', + default=90, + type=int + ) + return parser + + def take_action(self, args): + if not HAS_PROMETHEUS_CLIENT: + self.log.error("The prometheus_client python package must be installed to run this command") + sys.exit(2) + + verify = False if args.insecure else True + if args.ssl_ca: + verify = args.ssl_ca + client = get_client( + client=args.client, + endpoint=args.server, + timeout=args.timeout, + username=args.username, + password=args.password, + cert=args.ssl_cert, + key=args.ssl_key, + verify=verify, + run_sql_migrations=False, + ) + + # Prepare collectors so we can gather various metrics + playbooks = AraPlaybookCollector(client=client, log=self.log, limit=args.playbook_limit) + hosts = AraHostCollector(client=client, log=self.log, limit=args.host_limit) + tasks = AraTaskCollector(client=client, log=self.log, limit=args.task_limit) + + start_http_server(args.prometheus_port) + self.log.info(f"ara prometheus exporter listening on http://0.0.0.0:{args.prometheus_port}/metrics") + + created_after = (datetime.now() - timedelta(days=args.max_days)).isoformat() + self.log.info( + f"Backfilling metrics for the last {args.max_days} days since {created_after}... This can take a while." + ) + + latest = defaultdict(lambda: created_after) + while True: + latest["playbooks"] = playbooks.collect_metrics(latest["playbooks"]) + latest["hosts"] = hosts.collect_metrics(latest["hosts"]) + latest["tasks"] = tasks.collect_metrics(latest["tasks"]) + + time.sleep(args.poll_frequency) + self.log.info("Checking for updated metrics...") diff --git a/ara/cli/utils.py b/ara/cli/utils.py index ebdaa782..7a99248f 100644 --- a/ara/cli/utils.py +++ b/ara/cli/utils.py @@ -56,6 +56,14 @@ def avg_timedelta(delta: timedelta, count: int): return str(delta / count) +def increment_timestamp(timestamp, pattern="%Y-%m-%dT%H:%M:%S.%fZ"): + """ + API timestamps have this python isoformat: 2022-12-08T05:45:38.465607Z + We want to increment timestamps by one microsecond so we can search for things created after them. + """ + return (datetime.strptime(timestamp, pattern) + timedelta(microseconds=1)).isoformat() + + # Also see: ui.templatetags.truncatepath def truncatepath(path, count): """ diff --git a/contrib/grafana/ara-dashboard.json b/contrib/grafana/ara-dashboard.json new file mode 100644 index 00000000..56f1bc1f --- /dev/null +++ b/contrib/grafana/ara-dashboard.json @@ -0,0 +1,1843 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "9.5.3" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 13, + "title": "Playbooks", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 4, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 4, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (status) (ara_playbooks_count)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Playbook results by status", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 2, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 3, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "avg(ara_playbooks_sum) by (path)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Average playbook duration by path (in seconds)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd", + "seriesBy": "last" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "left", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 4, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 4, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (user) (ara_playbooks_count)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Playbooks by user", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd", + "seriesBy": "last" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "left", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 4, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 4, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 20, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (controller) (ara_playbooks_count)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Playbooks by controller", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd", + "seriesBy": "last" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "left", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 4, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 4, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 28 + }, + "id": 16, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (ansible_version) (ara_playbooks_count)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Playbooks by version of Ansible", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd", + "seriesBy": "last" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "left", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 4, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 4, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 28 + }, + "id": 17, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (python_version) (ara_playbooks_count)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Playbooks by version of Python", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd", + "seriesBy": "last" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "left", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 4, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 4, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 28 + }, + "id": 18, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (client_version) (ara_playbooks_count)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Playbooks by version of ara client", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 7, + "panels": [], + "title": "Tasks", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 4, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 4, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (status) (ara_tasks_count)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Task results by status", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 2, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 3, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 15, + "w": 8, + "x": 0, + "y": 46 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "avg(ara_tasks_sum) by (name)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Average task duration by name (in seconds)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 2, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 3, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 15, + "w": 8, + "x": 8, + "y": 46 + }, + "id": 11, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "avg(ara_tasks_sum) by (action)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Average task duration by action (in seconds)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 2, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 3, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 15, + "w": 8, + "x": 16, + "y": 46 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "avg(ara_tasks_sum) by (path)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Average task duration by path (in seconds)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd", + "seriesBy": "last" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "left", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 4, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 4, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 8, + "x": 0, + "y": 61 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (name) (ara_tasks_count{status=\"failed\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Task failures by name", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 4, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 4, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 8, + "x": 8, + "y": 61 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (action) (ara_tasks_count{status=\"failed\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Task failures by action", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 4, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 4, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 8, + "x": 16, + "y": 61 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (path) (ara_tasks_count{status=\"failed\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Task failures by path", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 74 + }, + "id": 6, + "panels": [], + "title": "Hosts", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 4, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 4, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 75 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(ara_hosts_ok)", + "legendFormat": "ok", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(ara_hosts_failed)", + "hide": false, + "legendFormat": "failed", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(ara_hosts_changed)", + "hide": false, + "legendFormat": "changed", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(ara_hosts_skipped)", + "hide": false, + "legendFormat": "skipped", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum (ara_hosts_unreachable)", + "hide": false, + "legendFormat": "unreachable", + "range": true, + "refId": "E" + } + ], + "title": "Host results by status", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 4, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 4, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 24, + "x": 0, + "y": 87 + }, + "id": 1, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (name) (ara_hosts_changed)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Host changed results by name", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 4, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 4, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 100 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (name) (ara_hosts_failed)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Host failed results by name", + "type": "timeseries" + } + ], + "refresh": "", + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Ansible metrics (by ara)", + "uid": "e0717f1a-4bb5-4373-b177-a9f5a498962d", + "version": 4, + "weekStart": "" +} \ No newline at end of file diff --git a/doc/requirements.txt b/doc/requirements.txt index fc5d33de..d40f396f 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -16,3 +16,5 @@ dynaconf[yaml] tzlocal whitenoise pygments +# for the "ara prometheus" exporter CLI command +prometheus_client \ No newline at end of file diff --git a/doc/source/_static/grafana-hosts.png b/doc/source/_static/grafana-hosts.png new file mode 100644 index 00000000..fb22d7f6 Binary files /dev/null and b/doc/source/_static/grafana-hosts.png differ diff --git a/doc/source/_static/grafana-playbooks.png b/doc/source/_static/grafana-playbooks.png new file mode 100644 index 00000000..211f27c6 Binary files /dev/null and b/doc/source/_static/grafana-playbooks.png differ diff --git a/doc/source/_static/grafana-tasks.png b/doc/source/_static/grafana-tasks.png new file mode 100644 index 00000000..00921f91 Binary files /dev/null and b/doc/source/_static/grafana-tasks.png differ diff --git a/doc/source/cli.rst b/doc/source/cli.rst index 0ad2a3b2..4eb0582b 100644 --- a/doc/source/cli.rst +++ b/doc/source/cli.rst @@ -474,6 +474,26 @@ Examples: # Aggregate metrics by task file rather than action ara task metrics --aggregate path +ara prometheus +-------------- + +.. command-output:: ara prometheus --help + +Also read: `documentation on prometheus `_. + +Examples: + +.. code-block:: bash + + # Start a prometheus exporter on the default address (http://0.0.0.0:8001/metrics) + # Then, backfill metrics from the last 365 days until now + # Then, poll every 30s for new metrics + ara prometheus --max-days 365 --poll 30 + + # When gathering metrics, customize the number of items returned in each page of results + # from the API based on instance size and performance expectations + ara prometheus --playbook-limit 500 --task-limit 1000 --host-limit 1000 + CLI: ara-manage (django API server) =================================== diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 2184aace..f9b43ed9 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -1,3 +1,5 @@ +.. _contributing: + Contributing to ARA =================== diff --git a/doc/source/index.rst b/doc/source/index.rst index e943d49e..f50b5b54 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -20,5 +20,6 @@ Table of Contents CLI: ara and ara-manage Contributing to ARA Frequently asked questions + Ansible metrics with Prometheus Troubleshooting Changelog and release notes diff --git a/doc/source/prometheus.rst b/doc/source/prometheus.rst new file mode 100644 index 00000000..bdbbf6d7 --- /dev/null +++ b/doc/source/prometheus.rst @@ -0,0 +1,117 @@ +Ansible metrics with Prometheus +=============================== + +ara doesn't provide monitoring or alerting out of the box (they are out of scope) but it records a number of granular metrics about Ansible playbooks, tasks and hosts, amongst other things. + +Starting with version 1.6.2, ara provides an integration of `prometheus_client `_ that queries the ara API and then exposes these metrics for prometheus to scrape. + +Once these metrics are in `Prometheus `_, they're available for queries, alerts and pretty graphs in `Grafana `_. + +The source code for the exporter can be found in the `git repository `_. + +Getting started +--------------- + +The ara prometheus exporter implementation is not an API server component and does not require an API server to run by default. + +It can run out of an installation from distribution packages, PyPI packages in a virtual environment or from a container. + +For example: + +.. code-block:: bash + + # Install ara, ansible and prometheus_client in a virtual environment + python3 -m venv ~/venv/ara + source ~/venv/ara/bin/activate + pip install ansible ara[server,prometheus] + + # Run and record a playbook + export ANSIBLE_CALLBACK_PLUGINS=$(python3 -m ara.setup.callback_plugins) + ansible-playbook ~/docs.yml + + # Start the prometheus exporter + ara prometheus + # http://127.0.0.1:8001/metrics available with metrics from running ~/docs.yml + +When running an ara API server, the prometheus exporter can be installed and run from wherever it is able to query the API. + +For example: + +.. code-block:: bash + + # Install ara and prometheus_client in a virtual environment + # (ansible and the ara API server dependencies are not required here) + python3 -m venv ~/venv/ara + source ~/venv/ara/bin/activate + pip install ara[prometheus] + + # Configure the prometheus exporter to query a running instance + export ARA_API_CLIENT=http + export ARA_API_SERVER=http://ara.example.org + + # Start the prometheus exporter, backfilling metrics from the last 30 days + # and then polling for new data every 30 seconds + ara prometheus --max-days 30 --poll 30 + # http://127.0.0.1:8001/metrics available with metrics from ara.example.org + +Once the exporter is running, Prometheus must be configured to scrape it: + +.. code-block:: yaml + + global: + scrape_interval: 30s + + scrape_configs: + - job_name: 'ara' + static_configs: + # Replace with wherever the exporter is available and listening relative to prometheus + - targets: ['10.0.0.10:8001'] + +Metrics should then be available as soon as Prometheus successfully scrapes at least once. + +Available metrics +----------------- + +The ``ara prometheus`` command queries the ara API and then makes the following metrics available: + +- **ara_playbooks** (`Summary `_) provides labels based on playbook properties and duration in seconds + +.. code-block:: + + ara_playbooks_count{ansible_version="2.15.0",client_version="1.6.2.dev10",controller="fedora",name="docs",path="/home/user/docs.yml",python_version="3.11.3",server_version="1.6.2.dev10",status="completed",updated="2023-06-08T02:43:29.910977Z",user="ansible"} 1.0 + ara_playbooks_sum{ansible_version="2.15.0",client_version="1.6.2.dev10",controller="fedora",name="docs",path="/home/user/docs.yml",python_version="3.11.3",server_version="1.6.2.dev10",status="completed",updated="2023-06-08T02:43:29.910977Z",user="ansible"} 14.161331 + +- **ara_tasks** (`Summary `_) provides labels based on task properties and duration in seconds + +.. code-block:: + + ara_tasks_count{action="command",name="docs",path="/home/user/docs.yml",playbook="30",status="completed",updated="2023-06-08T02:43:29.665787Z"} 1.0 + ara_tasks_sum{action="command",name="docs",path="/home/user/docs.yml",playbook="30",status="completed",updated="2023-06-08T02:43:29.665787Z"} 0.29482 + +- **ara_hosts_{ok,failed,changed,skipped,unreachable}** (`Gauge `_) provide labels based on host properties and number of results for each status + +.. code-block:: + + ara_hosts_ok{name="localhost",playbook="30",updated="2023-06-08T02:43:29.848077Z"} 36.0 + ara_hosts_failed{name="localhost",playbook="24",updated="2023-06-08T02:32:18.773096Z"} 1.0 + ara_hosts_changed{name="localhost",playbook="30",updated="2023-06-08T02:43:29.848077Z"} 10.0 + ara_hosts_skipped{name="host3",playbook="15",updated="2023-06-08T01:24:59.210984Z"} 2.0 + ara_hosts_unreachable{name="localhost",playbook="24",updated="2023-06-08T02:32:18.773096Z"} 1.0 + +Grafana dashboard +----------------- + +While everyone is encouraged to create and tweak their own dashboards according to their needs and preferences, the community maintains a Grafana dashboard to help users get started. + +It is in `contrib `_ inside the git repository. +Feel free to :ref:`open a pull request ` if you'd like to contribute ! + +The dashboard contains a high level overview of available metrics for playbooks, tasks and hosts. + +Open these screenshots in a new tab for viewing them in full resolution: + +.. image:: ../source/_static/grafana-playbooks.png + +.. image:: ../source/_static/grafana-tasks.png + +.. image:: ../source/_static/grafana-hosts.png diff --git a/setup.cfg b/setup.cfg index 9548d2c3..9410427e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -38,6 +38,7 @@ console_scripts = ara.cli = expire = ara.cli.expire:ExpireObjects + prometheus = ara.cli.prometheus:PrometheusExporter playbook list = ara.cli.playbook:PlaybookList playbook show = ara.cli.playbook:PlaybookShow playbook delete = ara.cli.playbook:PlaybookDelete @@ -80,6 +81,8 @@ postgresql= psycopg2 mysql= mysqlclient +prometheus= + prometheus_client [build_sphinx] source-dir = doc/source