Skip to content

Commit 7e05314

Browse files
authored
[serve] deflake test_autoscaling_policy_with_metr_disab (#54458)
Deflake `test_autoscaling_policy_with_metr_disab.py::TestAutoscalingMetrics::test_basic` When `RAY_SERVE_COLLECT_AUTOSCALING_METRICS_ON_HANDLE=0`, we collect ongoing request metrics at the replica and queued request metrics at the handle -- but ongoing request metrics are updated very fast while queued metrics are sent every 10s. Because of this delay the total number of ongoing requests climbs to almost 100 because before the queued request metrics are flushed, almost every request is double counted. Example: https://buildkite.com/ray-project/postmerge/builds/11322#0197eaca-62e1-457d-947b-a981210e98b9/177-852 Note that we are sending exactly 50 requests and expect the number of replicas to scale to exactly 5. However the metrics grow above 50 here, almost to 100, which causes the test to be flaky / fail. This pr sets the env var `RAY_SERVE_HANDLE_METRIC_PUSH_INTERVAL_S=0.1` and pairs with other stabilizing changes. Signed-off-by: Cindy Zhang <cindyzyx9@gmail.com>
1 parent 6e30704 commit 7e05314

File tree

2 files changed

+22
-11
lines changed

2 files changed

+22
-11
lines changed

python/ray/serve/tests/BUILD

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,11 @@ py_test(
291291
name = "test_autoscaling_policy_with_metr_disab",
292292
size = "large",
293293
srcs = ["test_autoscaling_policy.py"],
294-
env = {"RAY_SERVE_COLLECT_AUTOSCALING_METRICS_ON_HANDLE": "0"},
294+
env = {
295+
"RAY_SERVE_COLLECT_AUTOSCALING_METRICS_ON_HANDLE": "0",
296+
# Make sure queued metrics are cleared out quickly.
297+
"RAY_SERVE_HANDLE_METRIC_PUSH_INTERVAL_S": "0.1",
298+
},
295299
main = "test_autoscaling_policy.py",
296300
tags = [
297301
"autoscaling",

python/ray/serve/tests/test_autoscaling_policy.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,13 @@ def get_deployment_start_time(controller: ServeController, name: str):
5858
return deployment_info.start_time_ms
5959

6060

61+
def check_num_queued_requests_eq(handle: DeploymentHandle, expected: int):
62+
assert (
63+
handle._router._asyncio_router._metrics_manager.num_queued_requests == expected
64+
)
65+
return True
66+
67+
6168
def assert_no_replicas_deprovisioned(
6269
replica_ids_1: Iterable[ReplicaID], replica_ids_2: Iterable[ReplicaID]
6370
) -> None:
@@ -135,12 +142,9 @@ def test_basic(self, serve_instance):
135142
"max_replicas": 10,
136143
"target_ongoing_requests": 10,
137144
"upscale_delay_s": 0,
138-
"downscale_delay_s": 0,
145+
"downscale_delay_s": 5,
139146
"look_back_period_s": 1,
140147
},
141-
# We will send many requests. This will make sure replicas are
142-
# killed quickly during cleanup.
143-
graceful_shutdown_timeout_s=1,
144148
max_ongoing_requests=25,
145149
version="v1",
146150
)
@@ -154,24 +158,27 @@ async def __call__(self):
154158

155159
# Wait for metrics to propagate
156160
wait_for_condition(check_num_requests_ge, client=client, id=dep_id, expected=1)
157-
print("Autoscaling metrics started recording on controller.")
161+
tlog("Autoscaling metrics started recording on controller.")
158162

159163
# Many queries should be inflight.
160164
wait_for_condition(check_num_requests_ge, client=client, id=dep_id, expected=45)
161-
print("Confirmed many queries are inflight.")
165+
tlog("Confirmed many queries are inflight.")
166+
167+
wait_for_condition(check_num_queued_requests_eq, handle=handle, expected=0)
168+
tlog("Confirmed all requests are assigned to replicas.")
162169

163170
wait_for_condition(check_num_replicas_eq, name="A", target=5)
164-
print("Confirmed deployment scaled to 5 replicas.")
165-
print("Releasing signal.")
171+
tlog("Confirmed deployment scaled to 5 replicas.")
172+
tlog("Releasing signal.")
166173
signal.send.remote()
167174

168175
# After traffic stops, num replica should drop to 1
169176
wait_for_condition(check_num_replicas_eq, name="A", target=1, timeout=15)
170-
print("Num replicas dropped to 1.")
177+
tlog("Num replicas dropped to 1.")
171178

172179
# Request metrics should drop to 0
173180
wait_for_condition(check_num_requests_eq, client=client, id=dep_id, expected=0)
174-
print("Queued and ongoing requests dropped to 0.")
181+
tlog("Queued and ongoing requests dropped to 0.")
175182

176183
@pytest.mark.parametrize("use_generator", [True, False])
177184
def test_replicas_die(self, serve_instance_with_signal, use_generator):

0 commit comments

Comments
 (0)