Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(uptime): Handle uptime region in consumer #82986

Merged
merged 1 commit into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/sentry/testutils/cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -3113,11 +3113,15 @@ def create_uptime_result(
subscription_id: str | None = None,
status: str = CHECKSTATUS_FAILURE,
scheduled_check_time: datetime | None = None,
uptime_region: str | None = "us-west",
) -> CheckResult:
if subscription_id is None:
subscription_id = uuid.uuid4().hex
if scheduled_check_time is None:
scheduled_check_time = datetime.now().replace(microsecond=0)
optional_fields = {}
if uptime_region is not None:
optional_fields["region"] = uptime_region
return {
"guid": uuid.uuid4().hex,
"subscription_id": subscription_id,
Expand All @@ -3132,6 +3136,7 @@ def create_uptime_result(
"actual_check_time_ms": int(datetime.now().replace(microsecond=0).timestamp() * 1000),
"duration_ms": 100,
"request_info": {"request_type": REQUESTTYPE_HEAD, "http_status_code": 500},
**optional_fields,
}


Expand Down
35 changes: 29 additions & 6 deletions src/sentry/uptime/consumers/results_consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,11 @@ def handle_result(self, subscription: UptimeSubscription | None, result: CheckRe
send_uptime_config_deletion(
get_active_region_configs()[0].slug, result["subscription_id"]
)
metrics.incr("uptime.result_processor.subscription_not_found", sample_rate=1.0)
metrics.incr(
"uptime.result_processor.subscription_not_found",
sample_rate=1.0,
tags={"uptime_region": result.get("region", "default")},
)
return

self.check_and_update_regions(subscription)
Expand All @@ -158,6 +162,7 @@ def handle_result_for_project(
metric_tags = {
"status": result["status"],
"mode": ProjectUptimeSubscriptionMode(project_subscription.mode).name.lower(),
"uptime_region": result.get("region", "default"),
}

status_reason = "none"
Expand Down Expand Up @@ -202,12 +207,14 @@ def handle_result_for_project(
result["duration_ms"],
sample_rate=1.0,
unit="millisecond",
tags=metric_tags,
)
metrics.distribution(
"uptime.result_processor.check_result.delay",
result["actual_check_time_ms"] - result["scheduled_check_time_ms"],
sample_rate=1.0,
unit="millisecond",
tags=metric_tags,
)

if project_subscription.mode == ProjectUptimeSubscriptionMode.AUTO_DETECTED_ONBOARDING:
Expand Down Expand Up @@ -253,7 +260,10 @@ def handle_result_for_project_auto_onboarding_mode(
status_reason = result["status_reason"]["type"]
metrics.incr(
"uptime.result_processor.autodetection.failed_onboarding",
tags={"failure_reason": status_reason},
tags={
"failure_reason": status_reason,
"uptime_region": result.get("region", "default"),
},
sample_rate=1.0,
)
logger.info(
Expand Down Expand Up @@ -284,7 +294,9 @@ def handle_result_for_project_auto_onboarding_mode(
)
remove_uptime_subscription_if_unused(onboarding_subscription)
metrics.incr(
"uptime.result_processor.autodetection.graduated_onboarding", sample_rate=1.0
"uptime.result_processor.autodetection.graduated_onboarding",
sample_rate=1.0,
tags={"uptime_region": result.get("region", "default")},
)
logger.info(
"uptime_onboarding_graduated",
Expand Down Expand Up @@ -328,12 +340,19 @@ def handle_result_for_project_active_mode(
metrics.incr(
"uptime.result_processor.restricted_by_provider",
sample_rate=1.0,
tags={"host_provider_id": host_provider_id},
tags={
"host_provider_id": host_provider_id,
"uptime_region": result.get("region", "default"),
},
)

if issue_creation_flag_enabled and not issue_creation_restricted_by_provider:
create_issue_platform_occurrence(result, project_subscription)
metrics.incr("uptime.result_processor.active.sent_occurrence", sample_rate=1.0)
metrics.incr(
"uptime.result_processor.active.sent_occurrence",
tags={"uptime_region": result.get("region", "default")},
sample_rate=1.0,
)
logger.info(
"uptime_active_sent_occurrence",
extra={
Expand All @@ -354,7 +373,11 @@ def handle_result_for_project_active_mode(
"organizations:uptime-create-issues", project_subscription.project.organization
):
resolve_uptime_issue(project_subscription)
metrics.incr("uptime.result_processor.active.resolved", sample_rate=1.0)
metrics.incr(
"uptime.result_processor.active.resolved",
sample_rate=1.0,
tags={"uptime_region": result.get("region", "default")},
)
logger.info(
"uptime_active_resolved",
extra={
Expand Down
72 changes: 68 additions & 4 deletions tests/sentry/uptime/consumers/test_results_consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def test(self):
"status_reason": "timeout",
"status": "failure",
"mode": "auto_detected_active",
"uptime_region": "us-west",
},
sample_rate=1.0,
),
Expand All @@ -121,6 +122,7 @@ def test(self):
"status_reason": "timeout",
"status": "failure",
"mode": "auto_detected_active",
"uptime_region": "us-west",
},
sample_rate=1.0,
),
Expand All @@ -135,6 +137,41 @@ def test(self):
self.project_subscription.refresh_from_db()
assert self.project_subscription.uptime_status == UptimeStatus.FAILED

def test_no_uptime_region_default(self):
result = self.create_uptime_result(
self.subscription.subscription_id,
scheduled_check_time=datetime.now() - timedelta(minutes=5),
uptime_region=None,
)
with (
mock.patch("sentry.uptime.consumers.results_consumer.metrics") as metrics,
self.feature("organizations:uptime-create-issues"),
mock.patch(
"sentry.uptime.consumers.results_consumer.ACTIVE_FAILURE_THRESHOLD",
new=2,
),
):
self.send_result(result)
metrics.incr.assert_has_calls(
[
call(
"uptime.result_processor.handle_result_for_project",
tags={
"status_reason": "timeout",
"status": "failure",
"mode": "auto_detected_active",
"uptime_region": "default",
},
sample_rate=1.0,
),
call(
"uptime.result_processor.active.under_threshold",
sample_rate=1.0,
tags={"status": "failure"},
),
]
)

def test_restricted_host_provider_id(self):
"""
Test that we do NOT create an issue when the host provider identifier
Expand All @@ -160,7 +197,7 @@ def test_restricted_host_provider_id(self):
call(
"uptime.result_processor.restricted_by_provider",
sample_rate=1.0,
tags={"host_provider_id": "TEST"},
tags={"host_provider_id": "TEST", "uptime_region": "us-west"},
),
],
any_order=True,
Expand Down Expand Up @@ -194,6 +231,7 @@ def test_reset_fail_count(self):
"status_reason": "timeout",
"status": "failure",
"mode": "auto_detected_active",
"uptime_region": "us-west",
},
sample_rate=1.0,
),
Expand All @@ -220,6 +258,7 @@ def test_reset_fail_count(self):
"status_reason": "timeout",
"status": "success",
"mode": "auto_detected_active",
"uptime_region": "us-west",
},
sample_rate=1.0,
),
Expand All @@ -240,6 +279,7 @@ def test_reset_fail_count(self):
"status_reason": "timeout",
"status": "failure",
"mode": "auto_detected_active",
"uptime_region": "us-west",
},
sample_rate=1.0,
),
Expand Down Expand Up @@ -275,6 +315,7 @@ def test_no_create_issues_feature(self):
"status_reason": "timeout",
"status": "failure",
"mode": "auto_detected_active",
"uptime_region": "us-west",
},
sample_rate=1.0,
)
Expand Down Expand Up @@ -310,6 +351,7 @@ def test_resolve(self):
"status_reason": "timeout",
"status": "failure",
"mode": "auto_detected_active",
"uptime_region": "us-west",
},
sample_rate=1.0,
),
Expand All @@ -330,6 +372,7 @@ def test_resolve(self):
"status_reason": "timeout",
"status": "failure",
"mode": "auto_detected_active",
"uptime_region": "us-west",
},
sample_rate=1.0,
),
Expand Down Expand Up @@ -361,6 +404,7 @@ def test_resolve(self):
"status_reason": "timeout",
"status": "success",
"mode": "auto_detected_active",
"uptime_region": "us-west",
},
sample_rate=1.0,
)
Expand All @@ -380,7 +424,13 @@ def test_no_subscription(self):
):
self.send_result(result)
metrics.incr.assert_has_calls(
[call("uptime.result_processor.subscription_not_found", sample_rate=1.0)]
[
call(
"uptime.result_processor.subscription_not_found",
tags={"uptime_region": "us-west"},
sample_rate=1.0,
)
]
)
self.assert_producer_calls((subscription_id, kafka_definition.Topic.UPTIME_CONFIGS))

Expand All @@ -403,12 +453,17 @@ def test_skip_already_processed(self):
"status_reason": "timeout",
"status": "failure",
"mode": "auto_detected_active",
"uptime_region": "us-west",
},
sample_rate=1.0,
),
call(
"uptime.result_processor.skipping_already_processed_update",
tags={"status": CHECKSTATUS_FAILURE, "mode": "auto_detected_active"},
tags={
"status": CHECKSTATUS_FAILURE,
"mode": "auto_detected_active",
"uptime_region": "us-west",
},
sample_rate=1.0,
),
]
Expand All @@ -434,6 +489,7 @@ def test_missed(self):
"status": CHECKSTATUS_MISSED_WINDOW,
"mode": "auto_detected_active",
"status_reason": "timeout",
"uptime_region": "us-west",
},
sample_rate=1.0,
)
Expand Down Expand Up @@ -470,6 +526,7 @@ def test_onboarding_failure(self):
"status": CHECKSTATUS_FAILURE,
"mode": "auto_detected_onboarding",
"status_reason": "timeout",
"uptime_region": "us-west",
},
sample_rate=1.0,
),
Expand Down Expand Up @@ -503,12 +560,16 @@ def test_onboarding_failure(self):
"status": CHECKSTATUS_FAILURE,
"mode": "auto_detected_onboarding",
"status_reason": "timeout",
"uptime_region": "us-west",
},
sample_rate=1.0,
),
call(
"uptime.result_processor.autodetection.failed_onboarding",
tags={"failure_reason": CHECKSTATUSREASONTYPE_TIMEOUT},
tags={
"failure_reason": CHECKSTATUSREASONTYPE_TIMEOUT,
"uptime_region": "us-west",
},
sample_rate=1.0,
),
]
Expand Down Expand Up @@ -550,6 +611,7 @@ def test_onboarding_success_ongoing(self):
"status_reason": "timeout",
"status": "success",
"mode": "auto_detected_onboarding",
"uptime_region": "us-west",
},
sample_rate=1.0,
),
Expand Down Expand Up @@ -590,11 +652,13 @@ def test_onboarding_success_graduate(self):
"status_reason": "timeout",
"status": "success",
"mode": "auto_detected_onboarding",
"uptime_region": "us-west",
},
sample_rate=1.0,
),
call(
"uptime.result_processor.autodetection.graduated_onboarding",
tags={"uptime_region": "us-west"},
sample_rate=1.0,
),
]
Expand Down
Loading