From 85dcb957fb6b41f9d8a96057cf768a35d2aa4853 Mon Sep 17 00:00:00 2001 From: schnie Date: Tue, 9 Oct 2018 15:23:58 -0400 Subject: [PATCH 1/2] [AIRFLOW-3177] Change scheduler_heartbeat from gauge to counter This updates the scheduler_heartbeat metric from a gauge to a counter to better support the statsd_exporter for usage with Prometheus. A counter allows users to track the rate of the heartbeat, and integrates with the exporter better. A crashing or down scheduler will no longer emit the metric, but the statsd_exporter will continue to show a 1 for the metric value. This fixes that issue because a counter will continually change, and the lack of change indicates an issue with the scheduler. --- airflow/jobs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/jobs.py b/airflow/jobs.py index b224f755459ce..3922939a868a7 100644 --- a/airflow/jobs.py +++ b/airflow/jobs.py @@ -1895,7 +1895,7 @@ def process_file(self, file_path, pickle_dags=False, session=None): @provide_session def heartbeat_callback(self, session=None): - Stats.gauge('scheduler_heartbeat', 1, 1) + Stats.incr('scheduler_heartbeat', 1, 1) class BackfillJob(BaseJob): From 26f1d41531c4ef9e64cb6f62488d385aa2eaaed0 Mon Sep 17 00:00:00 2001 From: schnie Date: Tue, 9 Oct 2018 19:16:34 -0400 Subject: [PATCH 2/2] [AIRFLOW-3177] Add statsd change notice in UPDATING.md --- UPDATING.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/UPDATING.md b/UPDATING.md index 74337f3fe88de..5e1402576b867 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -52,6 +52,10 @@ To delete a user: airflow users --delete --username jondoe ``` +### StatsD Metrics + +The `scheduler_heartbeat` metric has been changed from a gauge to a counter. Each loop of the scheduler will increment the counter by 1. This provides a higher degree of visibility and allows for better integration with Prometheus using the [StatsD Exporter](https://github.com/prometheus/statsd_exporter). Scheduler upness can be determined by graphing and alerting using a rate. If the scheduler goes down, the rate will drop to 0. + ### Custom auth backends interface change We have updated the version of flask-login we depend upon, and as a result any