From a0b160552a12d4ab38ed369fba7129a427afe532 Mon Sep 17 00:00:00 2001 From: Julia Date: Wed, 24 Jan 2024 11:13:12 +0100 Subject: [PATCH 1/5] Retry perform_notification by telegram with respect to telegram ratelimit countdown --- engine/apps/alerts/tasks/notify_user.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/engine/apps/alerts/tasks/notify_user.py b/engine/apps/alerts/tasks/notify_user.py index 277cf4b87e..9a0e763563 100644 --- a/engine/apps/alerts/tasks/notify_user.py +++ b/engine/apps/alerts/tasks/notify_user.py @@ -5,6 +5,7 @@ from django.db import transaction from django.utils import timezone from kombu.utils.uuid import uuid as celery_uuid +from telegram.error import RetryAfter from apps.alerts.constants import NEXT_ESCALATION_DELAY from apps.alerts.signals import user_notification_action_triggered_signal @@ -289,7 +290,11 @@ def perform_notification(log_record_pk): phone_backend.notify_by_call(user, alert_group, notification_policy) elif notification_channel == UserNotificationPolicy.NotificationChannel.TELEGRAM: - TelegramToUserConnector.notify_user(user, alert_group, notification_policy) + try: + TelegramToUserConnector.notify_user(user, alert_group, notification_policy) + except RetryAfter as e: + countdown = getattr(e, "retry_after", 3) + perform_notification.retry((log_record_pk,), countdown=countdown, exc=e) elif notification_channel == UserNotificationPolicy.NotificationChannel.SLACK: # TODO: refactor checking the possibility of sending a notification in slack From 55da9b9c90442f082024de8da38976ae8e7feb31 Mon Sep 17 00:00:00 2001 From: Julia Date: Wed, 24 Jan 2024 11:13:30 +0100 Subject: [PATCH 2/5] Add test --- engine/apps/alerts/tests/test_notify_user.py | 35 ++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/engine/apps/alerts/tests/test_notify_user.py b/engine/apps/alerts/tests/test_notify_user.py index e06585b164..22b15d7dd5 100644 --- a/engine/apps/alerts/tests/test_notify_user.py +++ b/engine/apps/alerts/tests/test_notify_user.py @@ -1,6 +1,7 @@ from unittest.mock import patch import pytest +from telegram.error import RetryAfter from apps.alerts.models import AlertGroup from apps.alerts.tasks.notify_user import notify_user_task, perform_notification @@ -8,6 +9,7 @@ from apps.base.models.user_notification_policy import UserNotificationPolicy from apps.base.models.user_notification_policy_log_record import UserNotificationPolicyLogRecord from apps.slack.models import SlackMessage +from apps.telegram.models import TelegramToUserConnector NOTIFICATION_UNAUTHORIZED_MSG = "notification is not allowed for user" @@ -297,3 +299,36 @@ def test_perform_notification_missing_user_notification_policy_log_record(caplog "The alert group associated with this log record may have been deleted." ) in caplog.text assert f"perform_notification: found record for {invalid_pk}" not in caplog.text + + +@pytest.mark.django_db +def test_perform_notification_telegram_retryafter_error( + make_organization_and_user, + make_user_notification_policy, + make_alert_receive_channel, + make_alert_group, + make_user_notification_policy_log_record, +): + organization, user = make_organization_and_user() + user_notification_policy = make_user_notification_policy( + user=user, + step=UserNotificationPolicy.Step.NOTIFY, + notify_by=UserNotificationPolicy.NotificationChannel.TELEGRAM, + ) + alert_receive_channel = make_alert_receive_channel(organization=organization) + alert_group = make_alert_group(alert_receive_channel=alert_receive_channel) + log_record = make_user_notification_policy_log_record( + author=user, + alert_group=alert_group, + notification_policy=user_notification_policy, + type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_TRIGGERED, + ) + countdown = 15 + exc = RetryAfter(countdown) + with patch.object(TelegramToUserConnector, "notify_user", side_effect=exc) as mock_notify_user: + with patch.object(perform_notification, "retry") as mock_perform_notification_retry: + perform_notification(log_record.pk) + + mock_notify_user.assert_called_once_with(user, alert_group, user_notification_policy) + mock_perform_notification_retry.assert_called_once_with((log_record.pk,), countdown=countdown, exc=exc) + assert alert_group.personal_log_records.last() == log_record From 2314fae36ec1ecd8b50a97b63c78155560192896 Mon Sep 17 00:00:00 2001 From: Julia Date: Wed, 24 Jan 2024 11:23:55 +0100 Subject: [PATCH 3/5] Update CHANGELOG.md --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 51f7a19c3b..ca254640a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Improved logging during plugin sync and install with Grafana @mderynck ([#3730](https://github.com/grafana/oncall/pull/3730)) +### Fixed + +- Fixed too frequent retry of `perform_notification` task on Telegram ratelimit error by @Ferril ([#3744](https://github.com/grafana/oncall/pull/3744)) + ## v1.3.92 (2024-01-23) Maintenance release From 81131d731d918d28219bb48c7315bf762dc7b323 Mon Sep 17 00:00:00 2001 From: Julia Date: Wed, 24 Jan 2024 14:16:30 +0100 Subject: [PATCH 4/5] Update retry for `perform_notification` task --- engine/apps/alerts/tasks/notify_user.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/engine/apps/alerts/tasks/notify_user.py b/engine/apps/alerts/tasks/notify_user.py index 9a0e763563..972f2033ca 100644 --- a/engine/apps/alerts/tasks/notify_user.py +++ b/engine/apps/alerts/tasks/notify_user.py @@ -1,6 +1,7 @@ import time from functools import partial +from celery.exceptions import Retry from django.conf import settings from django.db import transaction from django.utils import timezone @@ -235,7 +236,10 @@ def _create_perform_notification_task(log_record_pk, alert_group_pk): @shared_dedicated_queue_retry_task( - autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None + autoretry_for=(Exception,), + retry_backoff=True, + dont_autoretry_for=(Retry,), + max_retries=1 if settings.DEBUG else None, ) def perform_notification(log_record_pk): from apps.base.models import UserNotificationPolicy, UserNotificationPolicyLogRecord @@ -294,7 +298,7 @@ def perform_notification(log_record_pk): TelegramToUserConnector.notify_user(user, alert_group, notification_policy) except RetryAfter as e: countdown = getattr(e, "retry_after", 3) - perform_notification.retry((log_record_pk,), countdown=countdown, exc=e) + raise perform_notification.retry((log_record_pk,), countdown=countdown, exc=e) elif notification_channel == UserNotificationPolicy.NotificationChannel.SLACK: # TODO: refactor checking the possibility of sending a notification in slack From 6504d6b7d752a0c8b88be3d77ab1b2bc8f3b2eaf Mon Sep 17 00:00:00 2001 From: Julia Date: Wed, 24 Jan 2024 14:18:51 +0100 Subject: [PATCH 5/5] fix test --- engine/apps/alerts/tests/test_notify_user.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/engine/apps/alerts/tests/test_notify_user.py b/engine/apps/alerts/tests/test_notify_user.py index 22b15d7dd5..e0c32d2c14 100644 --- a/engine/apps/alerts/tests/test_notify_user.py +++ b/engine/apps/alerts/tests/test_notify_user.py @@ -326,9 +326,8 @@ def test_perform_notification_telegram_retryafter_error( countdown = 15 exc = RetryAfter(countdown) with patch.object(TelegramToUserConnector, "notify_user", side_effect=exc) as mock_notify_user: - with patch.object(perform_notification, "retry") as mock_perform_notification_retry: + with pytest.raises(RetryAfter): perform_notification(log_record.pk) mock_notify_user.assert_called_once_with(user, alert_group, user_notification_policy) - mock_perform_notification_retry.assert_called_once_with((log_record.pk,), countdown=countdown, exc=exc) assert alert_group.personal_log_records.last() == log_record