Skip to content

Commit

Permalink
Update FailedRunsNotificationCronJob to report more clearly
Browse files Browse the repository at this point in the history
  • Loading branch information
evenicoulddoit committed Nov 30, 2016
1 parent 518965a commit c1e3614
Show file tree
Hide file tree
Showing 6 changed files with 266 additions and 54 deletions.
106 changes: 76 additions & 30 deletions django_cron/cron.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,48 +7,94 @@

class FailedRunsNotificationCronJob(CronJobBase):
"""
Send email if cron failed to run X times in a row
A regular job to send email reports for failed Cron jobs.
The job log is used to check for all unreported failures for each job
classes specified within the CRON_CLASSES dictionary. When the number of
failures for each job type exceeds the limit (which can be specified
either per-job or project wide) an email is sent to all relevant parties
detailing the error.
"""
RUN_EVERY_MINS = 30
RUN_EVERY_MINS = 0

schedule = Schedule(run_every_mins=RUN_EVERY_MINS)
code = 'django_cron.FailedRunsNotificationCronJob'

def do(self):
self.config = self.get_config()
cron_classes = [get_class(c_name) for c_name in settings.CRON_CLASSES]
cron_classes = [c for c in cron_classes if not isinstance(self, c)]

for cron_cls in cron_classes:
self.check_for_failures(cron_cls)

def get_config(self):
"""
Combine the default configuration with any project-specific ones.
"""
defaults = dict(
FAILED_RUNS_CRONJOB_EMAIL_PREFIX='[Cron Failure] - ',
CRON_MIN_NUM_FAILURES=10,
CRON_FAILURE_FROM_EMAIL=settings.DEFAULT_FROM_EMAIL,
CRON_FAILURE_EMAIL_RECIPIENTS=[
email for _, email in settings.ADMINS
]
)
return {
key: getattr(settings, key, defaults[key])
for key in defaults
}

def check_for_failures(self, cron_cls):
"""
Check the given Cron task for failed jobs, and report if required.
"""
min_failures = getattr(
cron_cls, 'MIN_NUM_FAILURES', self.config['CRON_MIN_NUM_FAILURES']
)

CRONS_TO_CHECK = map(lambda x: get_class(x), settings.CRON_CLASSES)
EMAILS = [admin[1] for admin in settings.ADMINS]
failed_jobs = CronJobLog.objects.filter(
code=cron_cls.code, is_success=False, failure_reported=False
)

try:
FAILED_RUNS_CRONJOB_EMAIL_PREFIX = settings.FAILED_RUNS_CRONJOB_EMAIL_PREFIX
except:
FAILED_RUNS_CRONJOB_EMAIL_PREFIX = ''
if failed_jobs.count() < min_failures:
return

for cron in CRONS_TO_CHECK:
self.report_failure(cron_cls, failed_jobs)
failed_jobs.update(failure_reported=True)

try:
min_failures = cron.MIN_NUM_FAILURES
except AttributeError:
min_failures = 10
def report_failure(self, cron_cls, failed_jobs):
"""
Report the failed jobs by sending an email (using django-common).
"""
send_mail(**self.get_send_mail_kwargs(cron_cls, failed_jobs))

failures = 0
def get_send_mail_kwargs(self, cron_cls, failed_jobs):
"""
Return the arguments to pass to send_mail for the given failed jobs.
"""
failed_reports = []

jobs = CronJobLog.objects.filter(code=cron.code).order_by('-end_time')[:min_failures]
for job in failed_jobs:
failed_reports.append(
u"Job ran at {start_time}:\n{message}"
.format(start_time=job.start_time, message=job.message)
)

message = ''
divider = "\n\n{0}\n\n".format("=" * 80)
message = divider.join(failed_reports)
subject = "{prefix}{code} failed".format(
prefix=self.config['FAILED_RUNS_CRONJOB_EMAIL_PREFIX'],
code=cron_cls.code
)

for job in jobs:
if not job.is_success:
failures += 1
message += 'Job ran at %s : \n\n %s \n\n' % (job.start_time, job.message)
if len(failed_reports) > 1:
subject = "{subject} {times} times".format(
subject=subject, times=len(failed_reports)
)

if failures == min_failures:
send_mail(
'%s%s failed %s times in a row!' % (
FAILED_RUNS_CRONJOB_EMAIL_PREFIX,
cron.code,
min_failures
),
message,
settings.DEFAULT_FROM_EMAIL, EMAILS
)
return dict(
subject=subject, message=message,
from_email=self.config['CRON_FAILURE_FROM_EMAIL'],
recipient_emails=self.config['CRON_FAILURE_EMAIL_RECIPIENTS']
)
30 changes: 30 additions & 0 deletions django_cron/migrations/0003_cronjoblog_failure_reported.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.10.3 on 2016-11-29 14:35
from __future__ import unicode_literals

from django.db import migrations, models


def set_old_logs_as_reported(apps, *args, **kwargs):
CronJobLog = apps.get_model('django_cron', 'CronJobLog')
CronJobLog.objects.update(failure_reported=True)


def noop(*args, **kwargs):
pass


class Migration(migrations.Migration):

dependencies = [
('django_cron', '0002_remove_max_length_from_CronJobLog_message'),
]

operations = [
migrations.AddField(
model_name='cronjoblog',
name='failure_reported',
field=models.BooleanField(default=False),
),
migrations.RunPython(set_old_logs_as_reported, reverse_code=noop)
]
1 change: 1 addition & 0 deletions django_cron/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class CronJobLog(models.Model):
start_time = models.DateTimeField(db_index=True)
end_time = models.DateTimeField(db_index=True)
is_success = models.BooleanField(default=False)
failure_reported = models.BooleanField(default=False)
message = models.TextField(default='', blank=True) # TODO: db_index=True

"""
Expand Down
135 changes: 119 additions & 16 deletions django_cron/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,21 @@
from time import sleep
from datetime import timedelta

from django import db
from mock import patch
from freezegun import freeze_time

from django import db
from django.test import TransactionTestCase
from django.core.management import call_command
from django.test.utils import override_settings
from django.test.client import Client
from django.core.urlresolvers import reverse
from django.contrib.auth.models import User

from freezegun import freeze_time

from django_cron.cron import FailedRunsNotificationCronJob
from django_cron.helpers import humanize_duration
from django_cron.models import CronJobLog
from test_crons import TestErrorCronJob


class OutBuffer(object):
Expand All @@ -34,7 +36,9 @@ def str_content(self):
return self._str_cache


class TestCase(TransactionTestCase):
class DjangoCronTestCase(TransactionTestCase):
def setUp(self):
CronJobLog.objects.all().delete()

success_cron = 'test_crons.TestSucessCronJob'
error_cron = 'test_crons.TestErrorCronJob'
Expand All @@ -44,9 +48,8 @@ class TestCase(TransactionTestCase):
does_not_exist_cron = 'ThisCronObviouslyDoesntExist'
test_failed_runs_notification_cron = 'django_cron.cron.FailedRunsNotificationCronJob'

def setUp(self):
CronJobLog.objects.all().delete()

class BaseTests(DjangoCronTestCase):
def test_success_cron(self):
logs_count = CronJobLog.objects.all().count()
call_command('runcrons', self.success_cron, force=True)
Expand Down Expand Up @@ -157,16 +160,6 @@ def test_cache_locking_backend(self):
# t.join(10)
# self.assertEqual(CronJobLog.objects.all().count(), logs_count + 1)

def test_failed_runs_notification(self):
CronJobLog.objects.all().delete()
logs_count = CronJobLog.objects.all().count()

for i in range(10):
call_command('runcrons', self.error_cron, force=True)
call_command('runcrons', self.test_failed_runs_notification_cron)

self.assertEqual(CronJobLog.objects.all().count(), logs_count + 11)

def test_humanize_duration(self):
test_subjects = (
(timedelta(days=1, hours=1, minutes=1, seconds=1), '1 day, 1 hour, 1 minute, 1 second'),
Expand All @@ -180,3 +173,113 @@ def test_humanize_duration(self):
humanize_duration(duration),
humanized
)


class FailureReportTests(DjangoCronTestCase):
"""
Unit tests for the FailedRunsNotificationCronJob.
"""
def _error_cron(self):
call_command('runcrons', self.error_cron, force=True)

def _report_cron(self):
call_command(
'runcrons', self.test_failed_runs_notification_cron,
force=True
)

def _error_and_report(self):
self._error_cron()
self._report_cron()

def _resolve_reported_failures(self, cron_cls, failed_jobs):
"""
Resolve the failed jobs passed to the notifier's report_failure().
This allows us to assert the jobs passed given that failed jobs is a
queryset which shouldn't match any instances after the notifier runs
as it should make all log entries as having been reported.
"""
self.reported_cls = cron_cls
self.reported_jobs = set(failed_jobs)

@patch.object(FailedRunsNotificationCronJob, 'report_failure')
def test_failed_notifications(self, mock_report):
"""
By default, the user should be notified after 10 job failures.
"""
mock_report.side_effect = self._resolve_reported_failures

for _ in range(9):
self._error_and_report()
self.assertEquals(0, mock_report.call_count)

# The tenth error triggers the report
self._error_and_report()
self.assertEqual(1, mock_report.call_count)

# The correct job class and entries should be included
self.assertEquals(TestErrorCronJob, self.reported_cls)
self.assertEquals(
set(CronJobLog.objects.filter(code=TestErrorCronJob.code)),
self.reported_jobs
)

@patch.object(FailedRunsNotificationCronJob, 'report_failure')
@override_settings(CRON_MIN_NUM_FAILURES=1)
def test_settings_can_override_number_of_failures(self, mock_report):
mock_report.side_effect = self._resolve_reported_failures
self._error_and_report()
self.assertEqual(1, mock_report.call_count)

@patch.object(FailedRunsNotificationCronJob, 'report_failure')
@override_settings(CRON_MIN_NUM_FAILURES=1)
def test_logs_all_unreported(self, mock_report):
mock_report.side_effect = self._resolve_reported_failures
self._error_cron()
self._error_and_report()
self.assertEqual(1, mock_report.call_count)
self.assertEqual(2, len(self.reported_jobs))

@patch.object(FailedRunsNotificationCronJob, 'report_failure')
@override_settings(CRON_MIN_NUM_FAILURES=1)
def test_only_logs_failures(self, mock_report):
mock_report.side_effect = self._resolve_reported_failures
call_command('runcrons', self.success_cron, force=True)
self._error_and_report()
self.assertEqual(
self.reported_jobs,
{CronJobLog.objects.get(code=TestErrorCronJob.code)}
)

@patch.object(FailedRunsNotificationCronJob, 'report_failure')
@override_settings(CRON_MIN_NUM_FAILURES=1)
def test_only_reported_once(self, mock_report):
mock_report.side_effect = self._resolve_reported_failures
self._error_and_report()
self.assertEqual(1, mock_report.call_count)

# Calling the notifier for a second time doesn't report a second time
self._report_cron()
self.assertEqual(1, mock_report.call_count)

@patch('django_cron.cron.send_mail')
@override_settings(
CRON_MIN_NUM_FAILURES=1,
CRON_FAILURE_FROM_EMAIL='from@email.com',
CRON_FAILURE_EMAIL_RECIPIENTS=['foo@bar.com', 'x@y.com'],
FAILED_RUNS_CRONJOB_EMAIL_PREFIX='ERROR!!!'
)
def test_uses_send_mail(self, mock_send_mail):
"""
Test that django_common is used to send the email notifications.
"""
self._error_and_report()
self.assertEquals(1, mock_send_mail.call_count)
kwargs = mock_send_mail.call_args[1]

self.assertIn('ERROR!!!', kwargs['subject'])
self.assertEquals('from@email.com', kwargs['from_email'])
self.assertEquals(
['foo@bar.com', 'x@y.com'], kwargs['recipient_emails']
)
Loading

0 comments on commit c1e3614

Please sign in to comment.