From dae601318bb316129323bed5f6b4eb90b1a83f6c Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Tue, 14 May 2019 09:26:53 +0300
Subject: [PATCH 01/58] initial work on e-mail report for failed queries
---
redash/tasks/queries.py | 37 +++++++++++++++++++++++++++++++++++--
1 file changed, 35 insertions(+), 2 deletions(-)
diff --git a/redash/tasks/queries.py b/redash/tasks/queries.py
index 6002ccd275..95137760dd 100644
--- a/redash/tasks/queries.py
+++ b/redash/tasks/queries.py
@@ -1,6 +1,7 @@
import logging
import signal
import time
+import datetime
import redis
from celery.exceptions import SoftTimeLimitExceeded, TimeLimitExceeded
@@ -11,7 +12,8 @@
from redash import models, redis_connection, settings, statsd_client
from redash.query_runner import InterruptException
from redash.tasks.alerts import check_alerts_for_query
-from redash.utils import gen_query_hash, json_dumps, utcnow, mustache_render
+from redash.tasks.general import send_mail
+from redash.utils import gen_query_hash, json_dumps, json_loads, utcnow, mustache_render
from redash.worker import celery
logger = get_task_logger(__name__)
@@ -385,10 +387,41 @@ def _load_data_source(self):
logger.info("task=execute_query state=load_ds ds_id=%d", self.data_source_id)
return models.DataSource.query.get(self.data_source_id)
+@celery.task(name="redash.tasks.send_aggregated_errors")
+def send_aggregated_errors(email_address):
+ key = 'aggregated_failures:{}'.format(email_address)
+ errors = [json_loads(e) for e in redis_connection.lrange(key, 0, -1)]
+
+ text = "We're sorry, but these queries failed over the past hour:\n" + \
+ '\n'.join(['\nQuery: {}\nFailed at: {}\nFailure reason: {}'.format(e['query'], e['failed_at'], e['message']) for e in errors])
+ send_mail.delay([email_address], "Failed Queries", None, text)
+
+ redis_connection.delete(key)
+
+def notify_of_failure(self, exc, _, args, __, ___):
+ if not settings.SEND_EMAIL_ON_FAILED_SCHEDULED_QUERIES:
+ return
+
+ query, _, metadata = args[0:3]
+ email_address = metadata.get('Username')
+
+ if email_address:
+ key = 'aggregated_failures:{}'.format(email_address)
+ redis_connection.lpush(key, json_dumps({
+ 'query': query,
+ 'message': exc.message,
+ 'failed_at': datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
+ }))
+
+ if not redis_connection.exists('{}:pending'.format(key)):
+ delay = 10
+ send_aggregated_errors.apply_async(args=(email_address,), countdown=delay)
+ redis_connection.set('{}:pending'.format(key), 1)
+ redis_connection.expire('{}:pending'.format(key), delay)
# user_id is added last as a keyword argument for backward compatability -- to support executing previously submitted
# jobs before the upgrade to this version.
-@celery.task(name="redash.tasks.execute_query", bind=True, track_started=True)
+@celery.task(name="redash.tasks.execute_query", bind=True, track_started=True, on_failure=notify_of_failure)
def execute_query(self, query, data_source_id, metadata, user_id=None,
scheduled_query_id=None, is_api_key=False):
if scheduled_query_id is not None:
From ca7e3a28cbfe309b099281c7395b78251e5f8659 Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Tue, 14 May 2019 09:58:42 +0300
Subject: [PATCH 02/58] send failure report only for scheduled queries and not
for adhoc queries
---
redash/tasks/queries.py | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/redash/tasks/queries.py b/redash/tasks/queries.py
index 95137760dd..0049bc7de3 100644
--- a/redash/tasks/queries.py
+++ b/redash/tasks/queries.py
@@ -399,16 +399,17 @@ def send_aggregated_errors(email_address):
redis_connection.delete(key)
def notify_of_failure(self, exc, _, args, __, ___):
- if not settings.SEND_EMAIL_ON_FAILED_SCHEDULED_QUERIES:
+ scheduled_query_id = args[-2]
+ if scheduled_query_id is None or not settings.SEND_EMAIL_ON_FAILED_SCHEDULED_QUERIES:
return
- query, _, metadata = args[0:3]
- email_address = metadata.get('Username')
+ query = models.Query.query.get(scheduled_query_id)
+ email_address = query.user.email
if email_address:
key = 'aggregated_failures:{}'.format(email_address)
redis_connection.lpush(key, json_dumps({
- 'query': query,
+ 'query': query.query_text,
'message': exc.message,
'failed_at': datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
}))
From b388c6f6a22741ab889610587bd5f1bfa9403134 Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Tue, 14 May 2019 10:00:37 +0300
Subject: [PATCH 03/58] add setting to determine if to send failure reports
---
redash/settings/__init__.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/redash/settings/__init__.py b/redash/settings/__init__.py
index 864d04385a..ddfd938397 100644
--- a/redash/settings/__init__.py
+++ b/redash/settings/__init__.py
@@ -212,6 +212,8 @@ def email_server_is_configured():
HOST = os.environ.get('REDASH_HOST', '')
+SEND_EMAIL_ON_FAILED_SCHEDULED_QUERIES = parse_boolean(os.environ.get('REDASH_SEND_EMAIL_ON_FAILED_SCHEDULED_QUERIES', 'false'))
+
ALERTS_DEFAULT_MAIL_SUBJECT_TEMPLATE = os.environ.get('REDASH_ALERTS_DEFAULT_MAIL_SUBJECT_TEMPLATE', "({state}) {alert_name}")
# How many requests are allowed per IP to the login page before
From e910fd3e6d782c5cb38b489063eb9ae4c1bf742f Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Tue, 14 May 2019 10:06:44 +0300
Subject: [PATCH 04/58] add setting to determine interval of aggregated e-mail
report
---
redash/settings/__init__.py | 1 +
redash/tasks/queries.py | 7 +++----
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/redash/settings/__init__.py b/redash/settings/__init__.py
index ddfd938397..b155956880 100644
--- a/redash/settings/__init__.py
+++ b/redash/settings/__init__.py
@@ -213,6 +213,7 @@ def email_server_is_configured():
HOST = os.environ.get('REDASH_HOST', '')
SEND_EMAIL_ON_FAILED_SCHEDULED_QUERIES = parse_boolean(os.environ.get('REDASH_SEND_EMAIL_ON_FAILED_SCHEDULED_QUERIES', 'false'))
+SEND_FAILURE_EMAIL_INTERVAL = int(os.environ.get('REDASH_SEND_FAILURE_EMAIL_INTERVAL', 3600))
ALERTS_DEFAULT_MAIL_SUBJECT_TEMPLATE = os.environ.get('REDASH_ALERTS_DEFAULT_MAIL_SUBJECT_TEMPLATE', "({state}) {alert_name}")
diff --git a/redash/tasks/queries.py b/redash/tasks/queries.py
index 0049bc7de3..f7fc08f25c 100644
--- a/redash/tasks/queries.py
+++ b/redash/tasks/queries.py
@@ -392,7 +392,7 @@ def send_aggregated_errors(email_address):
key = 'aggregated_failures:{}'.format(email_address)
errors = [json_loads(e) for e in redis_connection.lrange(key, 0, -1)]
- text = "We're sorry, but these queries failed over the past hour:\n" + \
+ text = "We're sorry, but these queries failed lately:\n" + \
'\n'.join(['\nQuery: {}\nFailed at: {}\nFailure reason: {}'.format(e['query'], e['failed_at'], e['message']) for e in errors])
send_mail.delay([email_address], "Failed Queries", None, text)
@@ -415,10 +415,9 @@ def notify_of_failure(self, exc, _, args, __, ___):
}))
if not redis_connection.exists('{}:pending'.format(key)):
- delay = 10
- send_aggregated_errors.apply_async(args=(email_address,), countdown=delay)
+ send_aggregated_errors.apply_async(args=(email_address,), countdown=settings.SEND_FAILURE_EMAIL_INTERVAL)
redis_connection.set('{}:pending'.format(key), 1)
- redis_connection.expire('{}:pending'.format(key), delay)
+ redis_connection.expire('{}:pending'.format(key), settings.SEND_FAILURE_EMAIL_INTERVAL)
# user_id is added last as a keyword argument for backward compatability -- to support executing previously submitted
# jobs before the upgrade to this version.
From e6955497a48260f66056906c3040268534370e39 Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Tue, 14 May 2019 11:26:52 +0300
Subject: [PATCH 05/58] html templating of scheduled query failure report
---
redash/tasks/queries.py | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/redash/tasks/queries.py b/redash/tasks/queries.py
index f7fc08f25c..60ef60374d 100644
--- a/redash/tasks/queries.py
+++ b/redash/tasks/queries.py
@@ -392,11 +392,10 @@ def send_aggregated_errors(email_address):
key = 'aggregated_failures:{}'.format(email_address)
errors = [json_loads(e) for e in redis_connection.lrange(key, 0, -1)]
- text = "We're sorry, but these queries failed lately:\n" + \
- '\n'.join(['\nQuery: {}\nFailed at: {}\nFailure reason: {}'.format(e['query'], e['failed_at'], e['message']) for e in errors])
- send_mail.delay([email_address], "Failed Queries", None, text)
-
- redis_connection.delete(key)
+ html = "We're sorry, but these queries failed lately:
- {}
".format(
+ ''.join(['Failed at: {}
Failure reason: {}
Query: {}'.format(e['failed_at'], e['message'], e['query']) for e in errors])
+ )
+ send_mail.delay([email_address], "Uh-oh, Some Scheduled Queries Failed!", html, None) redis_connection.delete(key)
def notify_of_failure(self, exc, _, args, __, ___):
scheduled_query_id = args[-2]
From 13e9bb11ec2dc1c9f18e675e5430d627f8c5ea7c Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Tue, 14 May 2019 11:35:53 +0300
Subject: [PATCH 06/58] break line
---
redash/tasks/queries.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/redash/tasks/queries.py b/redash/tasks/queries.py
index 60ef60374d..7712628df0 100644
--- a/redash/tasks/queries.py
+++ b/redash/tasks/queries.py
@@ -395,7 +395,9 @@ def send_aggregated_errors(email_address):
html = "We're sorry, but these queries failed lately:
- {}
".format(
''.join(['Failed at: {}
Failure reason: {}
Query: {}'.format(e['failed_at'], e['message'], e['query']) for e in errors])
)
- send_mail.delay([email_address], "Uh-oh, Some Scheduled Queries Failed!", html, None) redis_connection.delete(key)
+ send_mail.delay([email_address], "Uh-oh, Some Scheduled Queries Failed!", html, None)
+
+ redis_connection.delete(key)
def notify_of_failure(self, exc, _, args, __, ___):
scheduled_query_id = args[-2]
From 95be1a98910e0ff56db5e477d0551a71c0ad8038 Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Tue, 14 May 2019 12:34:58 +0300
Subject: [PATCH 07/58] support timeouts for failure reports
---
redash/tasks/queries.py | 22 +++++++++++++++-------
1 file changed, 15 insertions(+), 7 deletions(-)
diff --git a/redash/tasks/queries.py b/redash/tasks/queries.py
index 7712628df0..a94ca63fd4 100644
--- a/redash/tasks/queries.py
+++ b/redash/tasks/queries.py
@@ -17,7 +17,7 @@
from redash.worker import celery
logger = get_task_logger(__name__)
-
+TIMEOUT_MESSAGE = "Query exceeded Redash query execution time limit."
def _job_lock_id(query_hash, data_source_id):
return "query_hash_job:%s:%s" % (data_source_id, query_hash)
@@ -58,7 +58,7 @@ def to_dict(self):
status = self.STATUSES[task_status]
if isinstance(result, (TimeLimitExceeded, SoftTimeLimitExceeded)):
- error = "Query exceeded Redash query execution time limit."
+ error = TIMEOUT_MESSAGE
status = 4
elif isinstance(result, Exception):
error = result.message
@@ -144,7 +144,7 @@ def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query
result = execute_query.apply_async(args=args,
argsrepr=argsrepr,
queue=queue_name,
- time_limit=time_limit)
+ soft_time_limit=time_limit)
job = QueryTask(async_result=result)
logging.info("[%s] Created new job: %s", query_hash, job.id)
@@ -323,6 +323,11 @@ def run(self):
try:
data, error = query_runner.run_query(annotated_query, self.user)
+ except SoftTimeLimitExceeded as error:
+ if self.scheduled_query:
+ notify_of_failure(TIMEOUT_MESSAGE, self.scheduled_query.id)
+
+ raise error
except Exception as e:
error = text_type(e)
data = None
@@ -399,8 +404,10 @@ def send_aggregated_errors(email_address):
redis_connection.delete(key)
-def notify_of_failure(self, exc, _, args, __, ___):
- scheduled_query_id = args[-2]
+def on_failure(self, exc, _, args, __, ___):
+ notify_of_failure(exc.message, args[-2])
+
+def notify_of_failure(message, scheduled_query_id):
if scheduled_query_id is None or not settings.SEND_EMAIL_ON_FAILED_SCHEDULED_QUERIES:
return
@@ -411,7 +418,7 @@ def notify_of_failure(self, exc, _, args, __, ___):
key = 'aggregated_failures:{}'.format(email_address)
redis_connection.lpush(key, json_dumps({
'query': query.query_text,
- 'message': exc.message,
+ 'message': message,
'failed_at': datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
}))
@@ -422,12 +429,13 @@ def notify_of_failure(self, exc, _, args, __, ___):
# user_id is added last as a keyword argument for backward compatability -- to support executing previously submitted
# jobs before the upgrade to this version.
-@celery.task(name="redash.tasks.execute_query", bind=True, track_started=True, on_failure=notify_of_failure)
+@celery.task(name="redash.tasks.execute_query", bind=True, track_started=True, on_failure=on_failure)
def execute_query(self, query, data_source_id, metadata, user_id=None,
scheduled_query_id=None, is_api_key=False):
if scheduled_query_id is not None:
scheduled_query = models.Query.query.get(scheduled_query_id)
else:
scheduled_query = None
+
return QueryExecutor(self, query, data_source_id, user_id, is_api_key, metadata,
scheduled_query).run()
From ba299cd25811c396556ea4457e6212824b17a7d0 Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Tue, 14 May 2019 15:31:05 +0300
Subject: [PATCH 08/58] aggregate errors within message and warn if approaching
threshold
---
redash/settings/__init__.py | 1 +
redash/tasks/queries.py | 29 +++++++++++++++++++++++------
2 files changed, 24 insertions(+), 6 deletions(-)
diff --git a/redash/settings/__init__.py b/redash/settings/__init__.py
index b155956880..e3fcbc4a69 100644
--- a/redash/settings/__init__.py
+++ b/redash/settings/__init__.py
@@ -214,6 +214,7 @@ def email_server_is_configured():
SEND_EMAIL_ON_FAILED_SCHEDULED_QUERIES = parse_boolean(os.environ.get('REDASH_SEND_EMAIL_ON_FAILED_SCHEDULED_QUERIES', 'false'))
SEND_FAILURE_EMAIL_INTERVAL = int(os.environ.get('REDASH_SEND_FAILURE_EMAIL_INTERVAL', 3600))
+MAX_FAILURE_REPORTS_PER_QUERY = int(os.environ.get('REDASH_MAX_FAILURE_REPORTS_PER_QUERY', 5))
ALERTS_DEFAULT_MAIL_SUBJECT_TEMPLATE = os.environ.get('REDASH_ALERTS_DEFAULT_MAIL_SUBJECT_TEMPLATE', "({state}) {alert_name}")
diff --git a/redash/tasks/queries.py b/redash/tasks/queries.py
index a94ca63fd4..ce7cdf56c4 100644
--- a/redash/tasks/queries.py
+++ b/redash/tasks/queries.py
@@ -2,6 +2,7 @@
import signal
import time
import datetime
+from collections import Counter
import redis
from celery.exceptions import SoftTimeLimitExceeded, TimeLimitExceeded
@@ -323,12 +324,10 @@ def run(self):
try:
data, error = query_runner.run_query(annotated_query, self.user)
- except SoftTimeLimitExceeded as error:
- if self.scheduled_query:
+ except Exception as e:
+ if isinstance(e, SoftTimeLimitExceeded) and self.scheduled_query:
notify_of_failure(TIMEOUT_MESSAGE, self.scheduled_query.id)
- raise error
- except Exception as e:
error = text_type(e)
data = None
logging.warning('Unexpected error while running query:', exc_info=1)
@@ -396,9 +395,21 @@ def _load_data_source(self):
def send_aggregated_errors(email_address):
key = 'aggregated_failures:{}'.format(email_address)
errors = [json_loads(e) for e in redis_connection.lrange(key, 0, -1)]
+ occurrences = Counter((e.get('query'), e.get('message')) for e in errors)
+ unique_errors = {(e.get('query'), e.get('message')): e for e in errors}
html = "We're sorry, but these queries failed lately:
- {}
".format(
- ''.join(['Failed at: {}
Failure reason: {}
Query: {}'.format(e['failed_at'], e['message'], e['query']) for e in errors])
+ ''.join(["""
+ Failed at: {failed_at}
+ Failure reason: {failure_reason}
+ Failure count: {failure_count}
+ Query: {query}
+ {comment}""".format(
+ failed_at=v.get('failed_at'),
+ failure_reason=v.get('message'),
+ failure_count=occurrences[k],
+ comment=v.get('comment'),
+ query=v.get('query')) for k, v in unique_errors.iteritems()])
)
send_mail.delay([email_address], "Uh-oh, Some Scheduled Queries Failed!", html, None)
@@ -413,12 +424,18 @@ def notify_of_failure(message, scheduled_query_id):
query = models.Query.query.get(scheduled_query_id)
email_address = query.user.email
+ schedule_failures = query.schedule_failures + 1
- if email_address:
+ if email_address and schedule_failures < settings.MAX_FAILURE_REPORTS_PER_QUERY:
key = 'aggregated_failures:{}'.format(email_address)
+ reporting_will_soon_stop = schedule_failures > settings.MAX_FAILURE_REPORTS_PER_QUERY * 0.75
+ comment = 'This query is repeatedly failing. Reporting may stop when the query exceeds {} failures.'.format(settings.MAX_FAILURE_REPORTS_PER_QUERY) if reporting_will_soon_stop else ''
+
redis_connection.lpush(key, json_dumps({
+ 'id': query.id,
'query': query.query_text,
'message': message,
+ 'comment': comment,
'failed_at': datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
}))
From 072219545a7383583c3ad12ad8ae33641577fb1c Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Sun, 19 May 2019 10:19:08 +0300
Subject: [PATCH 09/58] handle errors in QueryExecutor.run instead of
on_failure
---
redash/tasks/queries.py | 25 ++++++++-----------------
1 file changed, 8 insertions(+), 17 deletions(-)
diff --git a/redash/tasks/queries.py b/redash/tasks/queries.py
index ce7cdf56c4..4e05020d78 100644
--- a/redash/tasks/queries.py
+++ b/redash/tasks/queries.py
@@ -325,9 +325,6 @@ def run(self):
try:
data, error = query_runner.run_query(annotated_query, self.user)
except Exception as e:
- if isinstance(e, SoftTimeLimitExceeded) and self.scheduled_query:
- notify_of_failure(TIMEOUT_MESSAGE, self.scheduled_query.id)
-
error = text_type(e)
data = None
logging.warning('Unexpected error while running query:', exc_info=1)
@@ -344,6 +341,7 @@ def run(self):
self.scheduled_query = models.db.session.merge(self.scheduled_query, load=False)
self.scheduled_query.schedule_failures += 1
models.db.session.add(self.scheduled_query)
+ notify_of_failure(error, self.scheduled_query)
models.db.session.commit()
raise result
else:
@@ -415,20 +413,13 @@ def send_aggregated_errors(email_address):
redis_connection.delete(key)
-def on_failure(self, exc, _, args, __, ___):
- notify_of_failure(exc.message, args[-2])
-
-def notify_of_failure(message, scheduled_query_id):
- if scheduled_query_id is None or not settings.SEND_EMAIL_ON_FAILED_SCHEDULED_QUERIES:
+def notify_of_failure(message, query):
+ if not settings.SEND_EMAIL_ON_FAILED_SCHEDULED_QUERIES:
return
- query = models.Query.query.get(scheduled_query_id)
- email_address = query.user.email
- schedule_failures = query.schedule_failures + 1
-
- if email_address and schedule_failures < settings.MAX_FAILURE_REPORTS_PER_QUERY:
- key = 'aggregated_failures:{}'.format(email_address)
- reporting_will_soon_stop = schedule_failures > settings.MAX_FAILURE_REPORTS_PER_QUERY * 0.75
+ if query.schedule_failures < settings.MAX_FAILURE_REPORTS_PER_QUERY:
+ key = 'aggregated_failures:{}'.format(query.user.email)
+ reporting_will_soon_stop = query.schedule_failures > settings.MAX_FAILURE_REPORTS_PER_QUERY * 0.75
comment = 'This query is repeatedly failing. Reporting may stop when the query exceeds {} failures.'.format(settings.MAX_FAILURE_REPORTS_PER_QUERY) if reporting_will_soon_stop else ''
redis_connection.lpush(key, json_dumps({
@@ -440,13 +431,13 @@ def notify_of_failure(message, scheduled_query_id):
}))
if not redis_connection.exists('{}:pending'.format(key)):
- send_aggregated_errors.apply_async(args=(email_address,), countdown=settings.SEND_FAILURE_EMAIL_INTERVAL)
+ send_aggregated_errors.apply_async(args=(query.user.email,), countdown=settings.SEND_FAILURE_EMAIL_INTERVAL)
redis_connection.set('{}:pending'.format(key), 1)
redis_connection.expire('{}:pending'.format(key), settings.SEND_FAILURE_EMAIL_INTERVAL)
# user_id is added last as a keyword argument for backward compatability -- to support executing previously submitted
# jobs before the upgrade to this version.
-@celery.task(name="redash.tasks.execute_query", bind=True, track_started=True, on_failure=on_failure)
+@celery.task(name="redash.tasks.execute_query", bind=True, track_started=True)
def execute_query(self, query, data_source_id, metadata, user_id=None,
scheduled_query_id=None, is_api_key=False):
if scheduled_query_id is not None:
From 8959688d6a471e5fb77f56eb835af63a548eb02c Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Sun, 19 May 2019 10:30:30 +0300
Subject: [PATCH 10/58] move failure report to its own module
---
redash/tasks/__init__.py | 1 +
redash/tasks/failure_report.py | 52 ++++++++++++++++++++++++++++++++++
redash/tasks/queries.py | 51 +--------------------------------
3 files changed, 54 insertions(+), 50 deletions(-)
create mode 100644 redash/tasks/failure_report.py
diff --git a/redash/tasks/__init__.py b/redash/tasks/__init__.py
index e5de680381..d172729d8b 100644
--- a/redash/tasks/__init__.py
+++ b/redash/tasks/__init__.py
@@ -1,3 +1,4 @@
from .general import record_event, version_check, send_mail, sync_user_details
from .queries import QueryTask, refresh_queries, refresh_schemas, cleanup_query_results, execute_query
from .alerts import check_alerts_for_query
+from .failure_report import notify_of_failure
diff --git a/redash/tasks/failure_report.py b/redash/tasks/failure_report.py
new file mode 100644
index 0000000000..2c47426876
--- /dev/null
+++ b/redash/tasks/failure_report.py
@@ -0,0 +1,52 @@
+import datetime
+from collections import Counter
+from redash.tasks.general import send_mail
+from redash.worker import celery
+from redash import redis_connection, settings
+from redash.utils import json_dumps, json_loads
+
+@celery.task(name="redash.tasks.send_aggregated_errors")
+def send_aggregated_errors(email_address):
+ key = 'aggregated_failures:{}'.format(email_address)
+ errors = [json_loads(e) for e in redis_connection.lrange(key, 0, -1)]
+ occurrences = Counter((e.get('query'), e.get('message')) for e in errors)
+ unique_errors = {(e.get('query'), e.get('message')): e for e in errors}
+
+ html = "We're sorry, but these queries failed lately:
- {}
".format(
+ ''.join(["""
+ Failed at: {failed_at}
+ Failure reason: {failure_reason}
+ Failure count: {failure_count}
+ Query: {query}
+ {comment}""".format(
+ failed_at=v.get('failed_at'),
+ failure_reason=v.get('message'),
+ failure_count=occurrences[k],
+ comment=v.get('comment'),
+ query=v.get('query')) for k, v in unique_errors.iteritems()])
+ )
+ send_mail.delay([email_address], "Uh-oh, Some Scheduled Queries Failed!", html, None)
+
+ redis_connection.delete(key)
+
+def notify_of_failure(message, query):
+ if not settings.SEND_EMAIL_ON_FAILED_SCHEDULED_QUERIES:
+ return
+
+ if query.schedule_failures < settings.MAX_FAILURE_REPORTS_PER_QUERY:
+ key = 'aggregated_failures:{}'.format(query.user.email)
+ reporting_will_soon_stop = query.schedule_failures > settings.MAX_FAILURE_REPORTS_PER_QUERY * 0.75
+ comment = 'This query is repeatedly failing. Reporting may stop when the query exceeds {} failures.'.format(settings.MAX_FAILURE_REPORTS_PER_QUERY) if reporting_will_soon_stop else ''
+
+ redis_connection.lpush(key, json_dumps({
+ 'id': query.id,
+ 'query': query.query_text,
+ 'message': message,
+ 'comment': comment,
+ 'failed_at': datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
+ }))
+
+ if not redis_connection.exists('{}:pending'.format(key)):
+ send_aggregated_errors.apply_async(args=(query.user.email,), countdown=settings.SEND_FAILURE_EMAIL_INTERVAL)
+ redis_connection.set('{}:pending'.format(key), 1)
+ redis_connection.expire('{}:pending'.format(key), settings.SEND_FAILURE_EMAIL_INTERVAL)
\ No newline at end of file
diff --git a/redash/tasks/queries.py b/redash/tasks/queries.py
index 4e05020d78..7e5b6c08ca 100644
--- a/redash/tasks/queries.py
+++ b/redash/tasks/queries.py
@@ -1,9 +1,6 @@
import logging
import signal
import time
-import datetime
-from collections import Counter
-
import redis
from celery.exceptions import SoftTimeLimitExceeded, TimeLimitExceeded
from celery.result import AsyncResult
@@ -13,7 +10,7 @@
from redash import models, redis_connection, settings, statsd_client
from redash.query_runner import InterruptException
from redash.tasks.alerts import check_alerts_for_query
-from redash.tasks.general import send_mail
+from redash.tasks.failure_report import notify_of_failure
from redash.utils import gen_query_hash, json_dumps, json_loads, utcnow, mustache_render
from redash.worker import celery
@@ -389,52 +386,6 @@ def _load_data_source(self):
logger.info("task=execute_query state=load_ds ds_id=%d", self.data_source_id)
return models.DataSource.query.get(self.data_source_id)
-@celery.task(name="redash.tasks.send_aggregated_errors")
-def send_aggregated_errors(email_address):
- key = 'aggregated_failures:{}'.format(email_address)
- errors = [json_loads(e) for e in redis_connection.lrange(key, 0, -1)]
- occurrences = Counter((e.get('query'), e.get('message')) for e in errors)
- unique_errors = {(e.get('query'), e.get('message')): e for e in errors}
-
- html = "We're sorry, but these queries failed lately:
- {}
".format(
- ''.join(["""
- Failed at: {failed_at}
- Failure reason: {failure_reason}
- Failure count: {failure_count}
- Query: {query}
- {comment}""".format(
- failed_at=v.get('failed_at'),
- failure_reason=v.get('message'),
- failure_count=occurrences[k],
- comment=v.get('comment'),
- query=v.get('query')) for k, v in unique_errors.iteritems()])
- )
- send_mail.delay([email_address], "Uh-oh, Some Scheduled Queries Failed!", html, None)
-
- redis_connection.delete(key)
-
-def notify_of_failure(message, query):
- if not settings.SEND_EMAIL_ON_FAILED_SCHEDULED_QUERIES:
- return
-
- if query.schedule_failures < settings.MAX_FAILURE_REPORTS_PER_QUERY:
- key = 'aggregated_failures:{}'.format(query.user.email)
- reporting_will_soon_stop = query.schedule_failures > settings.MAX_FAILURE_REPORTS_PER_QUERY * 0.75
- comment = 'This query is repeatedly failing. Reporting may stop when the query exceeds {} failures.'.format(settings.MAX_FAILURE_REPORTS_PER_QUERY) if reporting_will_soon_stop else ''
-
- redis_connection.lpush(key, json_dumps({
- 'id': query.id,
- 'query': query.query_text,
- 'message': message,
- 'comment': comment,
- 'failed_at': datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
- }))
-
- if not redis_connection.exists('{}:pending'.format(key)):
- send_aggregated_errors.apply_async(args=(query.user.email,), countdown=settings.SEND_FAILURE_EMAIL_INTERVAL)
- redis_connection.set('{}:pending'.format(key), 1)
- redis_connection.expire('{}:pending'.format(key), settings.SEND_FAILURE_EMAIL_INTERVAL)
-
# user_id is added last as a keyword argument for backward compatability -- to support executing previously submitted
# jobs before the upgrade to this version.
@celery.task(name="redash.tasks.execute_query", bind=True, track_started=True)
From d88e5891660fcfb4ab2706e3f9817631325b5dd0 Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Sun, 19 May 2019 10:48:34 +0300
Subject: [PATCH 11/58] indicate that failure count is since last report
---
redash/tasks/failure_report.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/redash/tasks/failure_report.py b/redash/tasks/failure_report.py
index 2c47426876..0307575336 100644
--- a/redash/tasks/failure_report.py
+++ b/redash/tasks/failure_report.py
@@ -9,6 +9,7 @@
def send_aggregated_errors(email_address):
key = 'aggregated_failures:{}'.format(email_address)
errors = [json_loads(e) for e in redis_connection.lrange(key, 0, -1)]
+ errors.reverse()
occurrences = Counter((e.get('query'), e.get('message')) for e in errors)
unique_errors = {(e.get('query'), e.get('message')): e for e in errors}
@@ -16,7 +17,7 @@ def send_aggregated_errors(email_address):
''.join(["""
Failed at: {failed_at}
Failure reason: {failure_reason}
- Failure count: {failure_count}
+ Failures since last report: {failure_count}
Query: {query}
{comment}""".format(
failed_at=v.get('failed_at'),
@@ -36,7 +37,7 @@ def notify_of_failure(message, query):
if query.schedule_failures < settings.MAX_FAILURE_REPORTS_PER_QUERY:
key = 'aggregated_failures:{}'.format(query.user.email)
reporting_will_soon_stop = query.schedule_failures > settings.MAX_FAILURE_REPORTS_PER_QUERY * 0.75
- comment = 'This query is repeatedly failing. Reporting may stop when the query exceeds {} failures.'.format(settings.MAX_FAILURE_REPORTS_PER_QUERY) if reporting_will_soon_stop else ''
+ comment = 'This query is repeatedly failing. Reporting may stop when the query exceeds {} overall failures.'.format(settings.MAX_FAILURE_REPORTS_PER_QUERY) if reporting_will_soon_stop else ''
redis_connection.lpush(key, json_dumps({
'id': query.id,
From 5adb58e74e049d72c95b1b91bea2d5c75b5f8b47 Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Sun, 19 May 2019 10:56:05 +0300
Subject: [PATCH 12/58] copy changes
---
redash/tasks/failure_report.py | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/redash/tasks/failure_report.py b/redash/tasks/failure_report.py
index 0307575336..8287d6c87d 100644
--- a/redash/tasks/failure_report.py
+++ b/redash/tasks/failure_report.py
@@ -15,7 +15,7 @@ def send_aggregated_errors(email_address):
html = "We're sorry, but these queries failed lately:
- {}
".format(
''.join(["""
- Failed at: {failed_at}
+ Last failure at: {failed_at}
Failure reason: {failure_reason}
Failures since last report: {failure_count}
Query: {query}
@@ -37,7 +37,10 @@ def notify_of_failure(message, query):
if query.schedule_failures < settings.MAX_FAILURE_REPORTS_PER_QUERY:
key = 'aggregated_failures:{}'.format(query.user.email)
reporting_will_soon_stop = query.schedule_failures > settings.MAX_FAILURE_REPORTS_PER_QUERY * 0.75
- comment = 'This query is repeatedly failing. Reporting may stop when the query exceeds {} overall failures.'.format(settings.MAX_FAILURE_REPORTS_PER_QUERY) if reporting_will_soon_stop else ''
+ comment = 'This query has failed a total of {failure_count} times. Reporting may stop when the query exceeds {max_failure_reports} overall failures.'.format(
+ failure_count=query.schedule_failures,
+ max_failure_reports=settings.MAX_FAILURE_REPORTS_PER_QUERY
+ ) if reporting_will_soon_stop else ''
redis_connection.lpush(key, json_dumps({
'id': query.id,
From db6086fe8af77b0e73a1ffe7a9404dbcf67d21ab Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Sun, 19 May 2019 11:04:09 +0300
Subject: [PATCH 13/58] format with
---
redash/tasks/failure_report.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/redash/tasks/failure_report.py b/redash/tasks/failure_report.py
index 8287d6c87d..9a531e4a68 100644
--- a/redash/tasks/failure_report.py
+++ b/redash/tasks/failure_report.py
@@ -16,9 +16,9 @@ def send_aggregated_errors(email_address):
html = "We're sorry, but these queries failed lately:
- {}
".format(
'
'.join(["""
Last failure at: {failed_at}
- Failure reason: {failure_reason}
+ Failure reason: {failure_reason}
Failures since last report: {failure_count}
- Query: {query}
+ Query: {query}
{comment}""".format(
failed_at=v.get('failed_at'),
failure_reason=v.get('message'),
From b90d673b6f9cf382ae68431ea9d3536494d3fafb Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Sun, 19 May 2019 11:49:38 +0300
Subject: [PATCH 14/58] styling, copy and add a link to the query instead of
the query text
---
redash/tasks/failure_report.py | 42 +++++++++++++++++++++-------------
1 file changed, 26 insertions(+), 16 deletions(-)
diff --git a/redash/tasks/failure_report.py b/redash/tasks/failure_report.py
index 9a531e4a68..e971a151b9 100644
--- a/redash/tasks/failure_report.py
+++ b/redash/tasks/failure_report.py
@@ -5,26 +5,35 @@
from redash import redis_connection, settings
from redash.utils import json_dumps, json_loads
+def base_url(org):
+ if settings.MULTI_ORG:
+ return "https://{}/{}".format(settings.HOST, org.slug)
+
+ return settings.HOST
+
@celery.task(name="redash.tasks.send_aggregated_errors")
def send_aggregated_errors(email_address):
key = 'aggregated_failures:{}'.format(email_address)
errors = [json_loads(e) for e in redis_connection.lrange(key, 0, -1)]
errors.reverse()
- occurrences = Counter((e.get('query'), e.get('message')) for e in errors)
- unique_errors = {(e.get('query'), e.get('message')): e for e in errors}
-
- html = "We're sorry, but these queries failed lately:
- {}
".format(
- ''.join(["""
- Last failure at: {failed_at}
- Failure reason: {failure_reason}
- Failures since last report: {failure_count}
- Query: {query}
- {comment}""".format(
- failed_at=v.get('failed_at'),
- failure_reason=v.get('message'),
- failure_count=occurrences[k],
- comment=v.get('comment'),
- query=v.get('query')) for k, v in unique_errors.iteritems()])
+ occurrences = Counter((e.get('id'), e.get('message')) for e in errors)
+ unique_errors = {(e.get('id'), e.get('message')): e for e in errors}
+
+ html = "Failed Scheduled Query Executions
{}".format(
+ ''.join(["""
+
+
+ Last failed at: {failed_at} (failed {failure_count} times since last report)
+ Error message: {failure_reason}
+ {comment}
+
""".format(
+ base_url=v.get('base_url'),
+ id=v.get('id'),
+ name=v.get('name'),
+ failed_at=v.get('failed_at'),
+ failure_reason=v.get('message'),
+ failure_count=occurrences[k],
+ comment=v.get('comment')) for k, v in unique_errors.iteritems()])
)
send_mail.delay([email_address], "Uh-oh, Some Scheduled Queries Failed!", html, None)
@@ -44,7 +53,8 @@ def notify_of_failure(message, query):
redis_connection.lpush(key, json_dumps({
'id': query.id,
- 'query': query.query_text,
+ 'name': query.name,
+ 'base_url': base_url(query.org),
'message': message,
'comment': comment,
'failed_at': datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
From ac9e8967bb3888aa80beb11eda2881cb814052e0 Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Sun, 19 May 2019 11:52:15 +0300
Subject: [PATCH 15/58] separate reports with
---
redash/tasks/failure_report.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/redash/tasks/failure_report.py b/redash/tasks/failure_report.py
index e971a151b9..49c30e251f 100644
--- a/redash/tasks/failure_report.py
+++ b/redash/tasks/failure_report.py
@@ -20,7 +20,7 @@ def send_aggregated_errors(email_address):
unique_errors = {(e.get('id'), e.get('message')): e for e in errors}
html = "Failed Scheduled Query Executions
{}".format(
- ''.join(["""
+ '
'.join(["""
Last failed at: {failed_at} (failed {failure_count} times since last report)
From 4e1ffe59398884c587e60f85a16e872158386244 Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Sun, 19 May 2019 11:52:31 +0300
Subject: [PATCH 16/58] switch to UTC
---
redash/tasks/failure_report.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/redash/tasks/failure_report.py b/redash/tasks/failure_report.py
index 49c30e251f..a8738815c2 100644
--- a/redash/tasks/failure_report.py
+++ b/redash/tasks/failure_report.py
@@ -57,7 +57,7 @@ def notify_of_failure(message, query):
'base_url': base_url(query.org),
'message': message,
'comment': comment,
- 'failed_at': datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
+ 'failed_at': datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
}))
if not redis_connection.exists('{}:pending'.format(key)):
From 66cea9545cada9da0fd55648e2339fad7273c55d Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Sun, 19 May 2019 11:58:39 +0300
Subject: [PATCH 17/58] move to actual e-mail subject
---
redash/tasks/failure_report.py | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/redash/tasks/failure_report.py b/redash/tasks/failure_report.py
index a8738815c2..355aa69ed5 100644
--- a/redash/tasks/failure_report.py
+++ b/redash/tasks/failure_report.py
@@ -19,8 +19,7 @@ def send_aggregated_errors(email_address):
occurrences = Counter((e.get('id'), e.get('message')) for e in errors)
unique_errors = {(e.get('id'), e.get('message')): e for e in errors}
- html = "Failed Scheduled Query Executions
{}".format(
- '
'.join(["""
+ html = '
'.join(["""
Last failed at: {failed_at} (failed {failure_count} times since last report)
@@ -34,8 +33,8 @@ def send_aggregated_errors(email_address):
failure_reason=v.get('message'),
failure_count=occurrences[k],
comment=v.get('comment')) for k, v in unique_errors.iteritems()])
- )
- send_mail.delay([email_address], "Uh-oh, Some Scheduled Queries Failed!", html, None)
+
+ send_mail.delay([email_address], "Failed Scheduled Query Executions", html, None)
redis_connection.delete(key)
From 6e20846cc318d8f579b7634bffb49aae491403b9 Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Sun, 19 May 2019 12:02:01 +0300
Subject: [PATCH 18/58] add explicit message for SoftTimeLimitExceeded
---
redash/tasks/queries.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/redash/tasks/queries.py b/redash/tasks/queries.py
index 7e5b6c08ca..b6fd31cb8a 100644
--- a/redash/tasks/queries.py
+++ b/redash/tasks/queries.py
@@ -322,7 +322,11 @@ def run(self):
try:
data, error = query_runner.run_query(annotated_query, self.user)
except Exception as e:
- error = text_type(e)
+ if isinstance(e, SoftTimeLimitExceeded):
+ error = TIMEOUT_MESSAGE
+ else:
+ error = text_type(e)
+
data = None
logging.warning('Unexpected error while running query:', exc_info=1)
From eb8275c6e1d5662975e02b152d957afb5e28ed3f Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Sun, 19 May 2019 12:08:54 +0300
Subject: [PATCH 19/58] fix test to use soft time limits
---
tests/tasks/test_queries.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/tasks/test_queries.py b/tests/tasks/test_queries.py
index 758d6e5402..d542c3991e 100644
--- a/tests/tasks/test_queries.py
+++ b/tests/tasks/test_queries.py
@@ -36,7 +36,7 @@ def test_limits_query_time(self, _):
enqueue_query(query.query_text, query.data_source, query.user_id, False, query, {'Username': 'Arik', 'Query ID': query.id})
_, kwargs = execute_query.apply_async.call_args
- self.assertEqual(60, kwargs.get('time_limit'))
+ self.assertEqual(60, kwargs.get('soft_time_limit'))
def test_multiple_enqueue_of_different_query(self):
query = self.factory.create_query()
From 6ba466f41a6d935f8497adc41761e31ace83ef16 Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Sun, 19 May 2019 12:10:06 +0300
Subject: [PATCH 20/58] default query failure threshold to 100
---
redash/settings/__init__.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/redash/settings/__init__.py b/redash/settings/__init__.py
index e3fcbc4a69..6d39a0c82d 100644
--- a/redash/settings/__init__.py
+++ b/redash/settings/__init__.py
@@ -214,7 +214,7 @@ def email_server_is_configured():
SEND_EMAIL_ON_FAILED_SCHEDULED_QUERIES = parse_boolean(os.environ.get('REDASH_SEND_EMAIL_ON_FAILED_SCHEDULED_QUERIES', 'false'))
SEND_FAILURE_EMAIL_INTERVAL = int(os.environ.get('REDASH_SEND_FAILURE_EMAIL_INTERVAL', 3600))
-MAX_FAILURE_REPORTS_PER_QUERY = int(os.environ.get('REDASH_MAX_FAILURE_REPORTS_PER_QUERY', 5))
+MAX_FAILURE_REPORTS_PER_QUERY = int(os.environ.get('REDASH_MAX_FAILURE_REPORTS_PER_QUERY', 100))
ALERTS_DEFAULT_MAIL_SUBJECT_TEMPLATE = os.environ.get('REDASH_ALERTS_DEFAULT_MAIL_SUBJECT_TEMPLATE', "({state}) {alert_name}")
From 8767011cffd0e8612c38c4d48388a3a8406dd860 Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Sun, 19 May 2019 12:17:37 +0300
Subject: [PATCH 21/58] use base_url from utils
---
redash/tasks/failure_report.py | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/redash/tasks/failure_report.py b/redash/tasks/failure_report.py
index 355aa69ed5..a4e2c54dfe 100644
--- a/redash/tasks/failure_report.py
+++ b/redash/tasks/failure_report.py
@@ -3,13 +3,7 @@
from redash.tasks.general import send_mail
from redash.worker import celery
from redash import redis_connection, settings
-from redash.utils import json_dumps, json_loads
-
-def base_url(org):
- if settings.MULTI_ORG:
- return "https://{}/{}".format(settings.HOST, org.slug)
-
- return settings.HOST
+from redash.utils import json_dumps, json_loads, base_url
@celery.task(name="redash.tasks.send_aggregated_errors")
def send_aggregated_errors(email_address):
From 045789dff3612dcf2015cfaa66e435f786312db5 Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Sun, 19 May 2019 12:21:16 +0300
Subject: [PATCH 22/58] newlines. newlines everywhere.
---
redash/tasks/failure_report.py | 11 ++++++++---
redash/tasks/queries.py | 2 ++
2 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/redash/tasks/failure_report.py b/redash/tasks/failure_report.py
index a4e2c54dfe..baaef90575 100644
--- a/redash/tasks/failure_report.py
+++ b/redash/tasks/failure_report.py
@@ -5,6 +5,7 @@
from redash import redis_connection, settings
from redash.utils import json_dumps, json_loads, base_url
+
@celery.task(name="redash.tasks.send_aggregated_errors")
def send_aggregated_errors(email_address):
key = 'aggregated_failures:{}'.format(email_address)
@@ -17,7 +18,9 @@ def send_aggregated_errors(email_address):
Last failed at: {failed_at} (failed {failure_count} times since last report)
- Error message: {failure_reason}
+ Error message:
+ {failure_reason}
+
{comment}
""".format(
base_url=v.get('base_url'),
@@ -32,6 +35,7 @@ def send_aggregated_errors(email_address):
redis_connection.delete(key)
+
def notify_of_failure(message, query):
if not settings.SEND_EMAIL_ON_FAILED_SCHEDULED_QUERIES:
return
@@ -39,7 +43,8 @@ def notify_of_failure(message, query):
if query.schedule_failures < settings.MAX_FAILURE_REPORTS_PER_QUERY:
key = 'aggregated_failures:{}'.format(query.user.email)
reporting_will_soon_stop = query.schedule_failures > settings.MAX_FAILURE_REPORTS_PER_QUERY * 0.75
- comment = 'This query has failed a total of {failure_count} times. Reporting may stop when the query exceeds {max_failure_reports} overall failures.'.format(
+ comment = """This query has failed a total of {failure_count} times.
+ Reporting may stop when the query exceeds {max_failure_reports} overall failures.""".format(
failure_count=query.schedule_failures,
max_failure_reports=settings.MAX_FAILURE_REPORTS_PER_QUERY
) if reporting_will_soon_stop else ''
@@ -56,4 +61,4 @@ def notify_of_failure(message, query):
if not redis_connection.exists('{}:pending'.format(key)):
send_aggregated_errors.apply_async(args=(query.user.email,), countdown=settings.SEND_FAILURE_EMAIL_INTERVAL)
redis_connection.set('{}:pending'.format(key), 1)
- redis_connection.expire('{}:pending'.format(key), settings.SEND_FAILURE_EMAIL_INTERVAL)
\ No newline at end of file
+ redis_connection.expire('{}:pending'.format(key), settings.SEND_FAILURE_EMAIL_INTERVAL)
diff --git a/redash/tasks/queries.py b/redash/tasks/queries.py
index b6fd31cb8a..a23f4148ff 100644
--- a/redash/tasks/queries.py
+++ b/redash/tasks/queries.py
@@ -17,6 +17,7 @@
logger = get_task_logger(__name__)
TIMEOUT_MESSAGE = "Query exceeded Redash query execution time limit."
+
def _job_lock_id(query_hash, data_source_id):
return "query_hash_job:%s:%s" % (data_source_id, query_hash)
@@ -390,6 +391,7 @@ def _load_data_source(self):
logger.info("task=execute_query state=load_ds ds_id=%d", self.data_source_id)
return models.DataSource.query.get(self.data_source_id)
+
# user_id is added last as a keyword argument for backward compatability -- to support executing previously submitted
# jobs before the upgrade to this version.
@celery.task(name="redash.tasks.execute_query", bind=True, track_started=True)
From 6f3c426482b05d5135ba6ce71a595db4657e397a Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Sun, 19 May 2019 12:31:38 +0300
Subject: [PATCH 23/58] remove redundant import
---
redash/tasks/queries.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/redash/tasks/queries.py b/redash/tasks/queries.py
index a23f4148ff..0178b90341 100644
--- a/redash/tasks/queries.py
+++ b/redash/tasks/queries.py
@@ -11,7 +11,7 @@
from redash.query_runner import InterruptException
from redash.tasks.alerts import check_alerts_for_query
from redash.tasks.failure_report import notify_of_failure
-from redash.utils import gen_query_hash, json_dumps, json_loads, utcnow, mustache_render
+from redash.utils import gen_query_hash, json_dumps, utcnow, mustache_render
from redash.worker import celery
logger = get_task_logger(__name__)
From abe6a36f77766637a63165b6e8b7a3d7aadd1df2 Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Tue, 21 May 2019 10:14:47 +0300
Subject: [PATCH 24/58] apply new design for failure report
---
redash/tasks/failure_report.py | 51 ++++++++++++++++++++++++++--------
1 file changed, 40 insertions(+), 11 deletions(-)
diff --git a/redash/tasks/failure_report.py b/redash/tasks/failure_report.py
index baaef90575..1536166c7a 100644
--- a/redash/tasks/failure_report.py
+++ b/redash/tasks/failure_report.py
@@ -14,15 +14,21 @@ def send_aggregated_errors(email_address):
occurrences = Counter((e.get('id'), e.get('message')) for e in errors)
unique_errors = {(e.get('id'), e.get('message')): e for e in errors}
- html = '
'.join(["""
-
-
- Last failed at: {failed_at} (failed {failure_count} times since last report)
- Error message:
- {failure_reason}
-
- {comment}
- """.format(
+ failures_html = ''.join(["""
+
+
+
+
+ Last failed: {failed_at}
+
+ + {failure_count} more failures since last report
+
+
Exception
+
{failure_reason}
+
+
{comment}
+
""".format(
base_url=v.get('base_url'),
id=v.get('id'),
name=v.get('name'),
@@ -31,7 +37,30 @@ def send_aggregated_errors(email_address):
failure_count=occurrences[k],
comment=v.get('comment')) for k, v in unique_errors.iteritems()])
- send_mail.delay([email_address], "Failed Scheduled Query Executions", html, None)
+ html = """
+
+
+
+
+
+
+
+
+
+

+
+
Redash failed to run the following queries:
+ {}
+
+
+
+ """.format(failures_html)
+
+ send_mail.delay([email_address], "Redash failed to execute {} of your queries".format(len(unique_errors.keys())), html, None)
redis_connection.delete(key)
@@ -55,7 +84,7 @@ def notify_of_failure(message, query):
'base_url': base_url(query.org),
'message': message,
'comment': comment,
- 'failed_at': datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
+ 'failed_at': datetime.datetime.utcnow().strftime("%B %d, %I:%M%p UTC")
}))
if not redis_connection.exists('{}:pending'.format(key)):
From a2072b64868730513e5d04197bb83c1c3740fd96 Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Tue, 21 May 2019 10:33:57 +0300
Subject: [PATCH 25/58] use jinja to format the failure report
---
redash/tasks/failure_report.py | 59 +++++++--------------------
redash/templates/emails/failures.html | 40 ++++++++++++++++++
2 files changed, 54 insertions(+), 45 deletions(-)
create mode 100644 redash/templates/emails/failures.html
diff --git a/redash/tasks/failure_report.py b/redash/tasks/failure_report.py
index 1536166c7a..c99fcc2fb2 100644
--- a/redash/tasks/failure_report.py
+++ b/redash/tasks/failure_report.py
@@ -1,5 +1,6 @@
import datetime
from collections import Counter
+from flask import render_template
from redash.tasks.general import send_mail
from redash.worker import celery
from redash import redis_connection, settings
@@ -14,51 +15,19 @@ def send_aggregated_errors(email_address):
occurrences = Counter((e.get('id'), e.get('message')) for e in errors)
unique_errors = {(e.get('id'), e.get('message')): e for e in errors}
- failures_html = ''.join(["""
-
-
-
-
- Last failed: {failed_at}
-
- + {failure_count} more failures since last report
-
-
Exception
-
{failure_reason}
-
-
{comment}
-
""".format(
- base_url=v.get('base_url'),
- id=v.get('id'),
- name=v.get('name'),
- failed_at=v.get('failed_at'),
- failure_reason=v.get('message'),
- failure_count=occurrences[k],
- comment=v.get('comment')) for k, v in unique_errors.iteritems()])
-
- html = """
-
-
-
-
-
-
-
-
-
-

-
-
Redash failed to run the following queries:
- {}
-
-
-
- """.format(failures_html)
+ context = {
+ 'failures': [{
+ 'base_url': v.get('base_url'),
+ 'id': v.get('id'),
+ 'name': v.get('name'),
+ 'failed_at': v.get('failed_at'),
+ 'failure_reason': v.get('message'),
+ 'failure_count': occurrences[k],
+ 'comment': v.get('comment')
+ } for k, v in unique_errors.iteritems()]
+ }
+
+ html = render_template('emails/failures.html', **context)
send_mail.delay([email_address], "Redash failed to execute {} of your queries".format(len(unique_errors.keys())), html, None)
diff --git a/redash/templates/emails/failures.html b/redash/templates/emails/failures.html
new file mode 100644
index 0000000000..8d2d84ad5f
--- /dev/null
+++ b/redash/templates/emails/failures.html
@@ -0,0 +1,40 @@
+
+
+
+
+
+
+
+
+
+

+
+
Redash failed to run the following queries:
+
+ {% for failure in failures %}
+
+
+
+
+ Last failed: {{failure.failed_at}}
+
+ + {{failure.failure_count}} more failures since last report
+
+
Exception
+
{{failure.failure_reason}}
+
+
{{failure.comment}}
+
+ {% endfor %}
+
+
+
+
+
\ No newline at end of file
From 102c1e7e183c2eee98eaf39dae9e347b93b497b5 Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Tue, 21 May 2019 10:38:36 +0300
Subject: [PATCH 26/58] don't show comment block if no comment is provided
---
redash/templates/emails/failures.html | 31 +++++++++++++++------------
1 file changed, 17 insertions(+), 14 deletions(-)
diff --git a/redash/templates/emails/failures.html b/redash/templates/emails/failures.html
index 8d2d84ad5f..59de400f54 100644
--- a/redash/templates/emails/failures.html
+++ b/redash/templates/emails/failures.html
@@ -18,20 +18,23 @@
Redash failed to run the following queries:
{% for failure in failures %}
-
-
-
-
- Last failed: {{failure.failed_at}}
-
- + {{failure.failure_count}} more failures since last report
-
-
Exception
-
{{failure.failure_reason}}
-
-
{{failure.comment}}
-
+
+
+
+
+ Last failed: {{failure.failed_at}}
+
+ + {{failure.failure_count}} more failures since last report
+
+
Exception
+
{{failure.failure_reason}}
+
+
+ {% if failure.comment %}
+
{{failure.comment}}
+ {% endif %}
+
{% endfor %}
From 04458be7c1bd88aa7840763e06320211ac7bc2f1 Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Tue, 21 May 2019 10:42:05 +0300
Subject: [PATCH 27/58] don't send emails if, for some reason, there are no
available errors
---
redash/tasks/failure_report.py | 40 ++++++++++++++++++----------------
1 file changed, 21 insertions(+), 19 deletions(-)
diff --git a/redash/tasks/failure_report.py b/redash/tasks/failure_report.py
index c99fcc2fb2..3388148b6e 100644
--- a/redash/tasks/failure_report.py
+++ b/redash/tasks/failure_report.py
@@ -11,25 +11,27 @@
def send_aggregated_errors(email_address):
key = 'aggregated_failures:{}'.format(email_address)
errors = [json_loads(e) for e in redis_connection.lrange(key, 0, -1)]
- errors.reverse()
- occurrences = Counter((e.get('id'), e.get('message')) for e in errors)
- unique_errors = {(e.get('id'), e.get('message')): e for e in errors}
-
- context = {
- 'failures': [{
- 'base_url': v.get('base_url'),
- 'id': v.get('id'),
- 'name': v.get('name'),
- 'failed_at': v.get('failed_at'),
- 'failure_reason': v.get('message'),
- 'failure_count': occurrences[k],
- 'comment': v.get('comment')
- } for k, v in unique_errors.iteritems()]
- }
-
- html = render_template('emails/failures.html', **context)
-
- send_mail.delay([email_address], "Redash failed to execute {} of your queries".format(len(unique_errors.keys())), html, None)
+
+ if errors:
+ errors.reverse()
+ occurrences = Counter((e.get('id'), e.get('message')) for e in errors)
+ unique_errors = {(e.get('id'), e.get('message')): e for e in errors}
+
+ context = {
+ 'failures': [{
+ 'base_url': v.get('base_url'),
+ 'id': v.get('id'),
+ 'name': v.get('name'),
+ 'failed_at': v.get('failed_at'),
+ 'failure_reason': v.get('message'),
+ 'failure_count': occurrences[k],
+ 'comment': v.get('comment')
+ } for k, v in unique_errors.iteritems()]
+ }
+
+ html = render_template('emails/failures.html', **context)
+
+ send_mail.delay([email_address], "Redash failed to execute {} of your queries".format(len(unique_errors.keys())), html, None)
redis_connection.delete(key)
From 4770c4332b129a149d8ab7b706ddd5ef9d1b0263 Mon Sep 17 00:00:00 2001
From: Omer Lachish
Date: Tue, 21 May 2019 10:43:57 +0300
Subject: [PATCH 28/58] subtract 1 from failure count, because the first one is
represented by 'Last failed'
---
redash/templates/emails/failures.html | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/redash/templates/emails/failures.html b/redash/templates/emails/failures.html
index 59de400f54..e5ec3233fe 100644
--- a/redash/templates/emails/failures.html
+++ b/redash/templates/emails/failures.html
@@ -25,7 +25,7 @@