Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor create_alert task #3759

Merged
merged 5 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add check whether organization has Slack connection on update Slack related field using public api endpoints
by @Ferril ([#3751](https://github.com/grafana/oncall/pull/3751))
- Fixed calculating the number of on-call users per team by @Ferril ([#3773](https://github.com/grafana/oncall/pull/3773))
- Refactor create_alert task by @iskhakov ([#3604](https://github.com/grafana/oncall/pull/3759))

## v1.3.92 (2024-01-23)

Expand Down
54 changes: 31 additions & 23 deletions engine/apps/alerts/models/alert.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from apps.alerts import tasks
from apps.alerts.constants import TASK_DELAY_SECONDS
from apps.alerts.incident_appearance.templaters import TemplateLoader
from apps.alerts.signals import alert_group_escalation_snapshot_built
from apps.alerts.tasks.distribute_alert import send_alert_create_signal
from apps.labels.alert_group_labels import assign_labels
from common.jinja_templater import apply_jinja_template
from common.jinja_templater.apply_jinja_template import JinjaTemplateError, JinjaTemplateWarning
Expand Down Expand Up @@ -102,28 +104,16 @@ def create(
if channel_filter is None:
channel_filter = ChannelFilter.select_filter(alert_receive_channel, raw_request_data, force_route_id)

# Get or create group
group, group_created = AlertGroup.objects.get_or_create_grouping(
channel=alert_receive_channel,
channel_filter=channel_filter,
group_data=group_data,
received_at=received_at,
)
logger.debug(f"alert group {group.pk} created={group_created}")

if group_created:
assign_labels(group, alert_receive_channel, raw_request_data)
group.log_records.create(type=AlertGroupLogRecord.TYPE_REGISTERED)
group.log_records.create(type=AlertGroupLogRecord.TYPE_ROUTE_ASSIGNED)

mark_as_resolved = (
enable_autoresolve and group_data.is_resolve_signal and alert_receive_channel.allow_source_based_resolving
)
if not group.resolved and mark_as_resolved:
group.resolve_by_source()

mark_as_acknowledged = group_data.is_acknowledge_signal
if not group.acknowledged and mark_as_acknowledged:
group.acknowledge_by_source()

# Create alert
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved alert creation and group creation together
TODO: consider adding transaction

alert = cls(
is_resolve_signal=group_data.is_resolve_signal,
title=title,
Expand All @@ -135,21 +125,39 @@ def create(
raw_request_data=raw_request_data,
is_the_first_alert_in_group=group_created,
)

alert.save()
logger.debug(f"alert {alert.pk} created")

transaction.on_commit(partial(send_alert_create_signal.apply_async, (alert.pk,)))

if group_created:
assign_labels(group, alert_receive_channel, raw_request_data)
group.log_records.create(type=AlertGroupLogRecord.TYPE_REGISTERED)
group.log_records.create(type=AlertGroupLogRecord.TYPE_ROUTE_ASSIGNED)

if group_created or alert.group.pause_escalation:
# Build escalation snapshot if needed and start escalation
alert.group.start_escalation_if_needed(countdown=TASK_DELAY_SECONDS)
Comment on lines +138 to +140
Copy link
Contributor Author

@iskhakov iskhakov Jan 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Start escalation here
(instead of checking if it is the first alert in distribute alert task https://github.com/grafana/oncall/pull/3759/files#diff-e0f70dd85cf6da1d72318bfb28759b3ebf069a28c16717ac233bf760df488a55R31)


if group_created:
# TODO: consider moving to start_escalation_if_needed
alert_group_escalation_snapshot_built.send(sender=cls.__class__, alert_group=alert.group)

mark_as_acknowledged = group_data.is_acknowledge_signal
if not group.acknowledged and mark_as_acknowledged:
group.acknowledge_by_source()

mark_as_resolved = (
enable_autoresolve and group_data.is_resolve_signal and alert_receive_channel.allow_source_based_resolving
)
if not group.resolved and mark_as_resolved:
group.resolve_by_source()

# Store exact alert which resolved group.
if group.resolved_by == AlertGroup.SOURCE and group.resolved_by_alert is None:
group.resolved_by_alert = alert
group.save(update_fields=["resolved_by_alert"])

if settings.DEBUG:
tasks.distribute_alert(alert.pk)
else:
transaction.on_commit(
partial(tasks.distribute_alert.apply_async, (alert.pk,), countdown=TASK_DELAY_SECONDS)
)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need this task anymore
TODO: don't forget to change alerts and dashboards


if group_created:
# all code below related to maintenance mode
maintenance_uuid = None
Expand Down
2 changes: 2 additions & 0 deletions engine/apps/alerts/tasks/distribute_alert.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
def distribute_alert(alert_id):
"""
We need this task to make task processing async and to make sure the task is delivered.
This task is not used anymore, but we keep it for the tasks in the queue to be processed.
TODO: remove this task after all the tasks in the queue are processed.
"""
from apps.alerts.models import Alert

Expand Down
7 changes: 3 additions & 4 deletions engine/apps/alerts/tests/test_alert.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@


@pytest.mark.django_db
@patch("apps.alerts.tasks.distribute_alert.distribute_alert.apply_async", return_value=None)
@patch("apps.alerts.tasks.distribute_alert.send_alert_create_signal.apply_async", return_value=None)
def test_alert_create_default_channel_filter(
mocked_distribute_alert_task,
mocked_send_alert_create_signal,
make_organization,
make_alert_receive_channel,
make_channel_filter,
Expand All @@ -30,10 +30,9 @@ def test_alert_create_default_channel_filter(
image_url=None,
link_to_upstream_details=None,
)

assert alert.group.channel_filter == channel_filter
assert len(callbacks) == 1
mocked_distribute_alert_task.assert_called_once_with((alert.pk,), countdown=1)
mocked_send_alert_create_signal.assert_called_once_with((alert.pk,))


@pytest.mark.django_db
Expand Down
6 changes: 3 additions & 3 deletions engine/apps/integrations/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ def create_alertmanager_alerts(alert_receive_channel_pk, alert, is_demo=False, f
alert.group.active_resolve_calculation_id = task.id
alert.group.save(update_fields=["active_resolve_calculation_id"])

logger.info(
f"Created alert alert_id={alert.pk} alert_group_id={alert.group.pk} channel_id={alert_receive_channel.pk}"
logger.debug(
f"Created alertmanager alert alert_id={alert.pk} alert_group_id={alert.group.pk} channel_id={alert_receive_channel.pk}"
)


Expand Down Expand Up @@ -109,7 +109,7 @@ def create_alert(
is_demo=is_demo,
received_at=received_at,
)
logger.info(
logger.debug(
f"Created alert alert_id={alert.pk} alert_group_id={alert.group.pk} channel_id={alert_receive_channel.pk}"
)
except ConcurrentUpdateError:
Expand Down
Loading