Skip to content

Commit

Permalink
Retry perform_notification with Telegram ratelimit countdown on `Re…
Browse files Browse the repository at this point in the history
…tryAfter` error (#3744)

# What this PR does
Use Telegram ratelimit countdown when retry `perform_notification` task
on `RetryAfter` error
## Which issue(s) this PR fixes
grafana/oncall-private#2451

## Checklist

- [x] Unit, integration, and e2e (if applicable) tests updated
- [x] Documentation added (or `pr:no public docs` PR label added if not
required)
- [x] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not
required)
  • Loading branch information
Ferril authored and iskhakov committed Feb 20, 2024
1 parent ff77795 commit e5a60cd
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 2 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Improved logging during plugin sync and install with Grafana @mderynck ([#3730](https://github.com/grafana/oncall/pull/3730))

### Fixed

- Fixed too frequent retry of `perform_notification` task on Telegram ratelimit error by @Ferril ([#3744](https://github.com/grafana/oncall/pull/3744))

## v1.3.92 (2024-01-23)

Maintenance release
Expand Down
13 changes: 11 additions & 2 deletions engine/apps/alerts/tasks/notify_user.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import time
from functools import partial

from celery.exceptions import Retry
from django.conf import settings
from django.db import transaction
from django.utils import timezone
from kombu.utils.uuid import uuid as celery_uuid
from telegram.error import RetryAfter

from apps.alerts.constants import NEXT_ESCALATION_DELAY
from apps.alerts.signals import user_notification_action_triggered_signal
Expand Down Expand Up @@ -234,7 +236,10 @@ def _create_perform_notification_task(log_record_pk, alert_group_pk):


@shared_dedicated_queue_retry_task(
autoretry_for=(Exception,), retry_backoff=True, max_retries=1 if settings.DEBUG else None
autoretry_for=(Exception,),
retry_backoff=True,
dont_autoretry_for=(Retry,),
max_retries=1 if settings.DEBUG else None,
)
def perform_notification(log_record_pk):
from apps.base.models import UserNotificationPolicy, UserNotificationPolicyLogRecord
Expand Down Expand Up @@ -289,7 +294,11 @@ def perform_notification(log_record_pk):
phone_backend.notify_by_call(user, alert_group, notification_policy)

elif notification_channel == UserNotificationPolicy.NotificationChannel.TELEGRAM:
TelegramToUserConnector.notify_user(user, alert_group, notification_policy)
try:
TelegramToUserConnector.notify_user(user, alert_group, notification_policy)
except RetryAfter as e:
countdown = getattr(e, "retry_after", 3)
raise perform_notification.retry((log_record_pk,), countdown=countdown, exc=e)

elif notification_channel == UserNotificationPolicy.NotificationChannel.SLACK:
# TODO: refactor checking the possibility of sending a notification in slack
Expand Down
34 changes: 34 additions & 0 deletions engine/apps/alerts/tests/test_notify_user.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from unittest.mock import patch

import pytest
from telegram.error import RetryAfter

from apps.alerts.models import AlertGroup
from apps.alerts.tasks.notify_user import notify_user_task, perform_notification
from apps.api.permissions import LegacyAccessControlRole
from apps.base.models.user_notification_policy import UserNotificationPolicy
from apps.base.models.user_notification_policy_log_record import UserNotificationPolicyLogRecord
from apps.slack.models import SlackMessage
from apps.telegram.models import TelegramToUserConnector

NOTIFICATION_UNAUTHORIZED_MSG = "notification is not allowed for user"

Expand Down Expand Up @@ -297,3 +299,35 @@ def test_perform_notification_missing_user_notification_policy_log_record(caplog
"The alert group associated with this log record may have been deleted."
) in caplog.text
assert f"perform_notification: found record for {invalid_pk}" not in caplog.text


@pytest.mark.django_db
def test_perform_notification_telegram_retryafter_error(
make_organization_and_user,
make_user_notification_policy,
make_alert_receive_channel,
make_alert_group,
make_user_notification_policy_log_record,
):
organization, user = make_organization_and_user()
user_notification_policy = make_user_notification_policy(
user=user,
step=UserNotificationPolicy.Step.NOTIFY,
notify_by=UserNotificationPolicy.NotificationChannel.TELEGRAM,
)
alert_receive_channel = make_alert_receive_channel(organization=organization)
alert_group = make_alert_group(alert_receive_channel=alert_receive_channel)
log_record = make_user_notification_policy_log_record(
author=user,
alert_group=alert_group,
notification_policy=user_notification_policy,
type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_TRIGGERED,
)
countdown = 15
exc = RetryAfter(countdown)
with patch.object(TelegramToUserConnector, "notify_user", side_effect=exc) as mock_notify_user:
with pytest.raises(RetryAfter):
perform_notification(log_record.pk)

mock_notify_user.assert_called_once_with(user, alert_group, user_notification_policy)
assert alert_group.personal_log_records.last() == log_record

0 comments on commit e5a60cd

Please sign in to comment.