diff --git a/python/ray/serve/tests/test_autoscaling_metrics.py b/python/ray/serve/tests/test_autoscaling_metrics.py index 1b1eb6e0d8c28..148a6eb871e84 100644 --- a/python/ray/serve/tests/test_autoscaling_metrics.py +++ b/python/ray/serve/tests/test_autoscaling_metrics.py @@ -1,8 +1,6 @@ -import time - import ray from ray import serve -from ray._private.test_utils import wait_for_condition +from ray._private.test_utils import SignalActor, wait_for_condition from ray.serve._private.autoscaling_metrics import InMemoryMetricsStore from ray.serve._private.common import DeploymentID, ReplicaState @@ -72,6 +70,8 @@ def test_multiple_metrics(self): def test_e2e(serve_instance): + signal = SignalActor.remote() + @serve.deployment( autoscaling_config={ "metrics_interval_s": 0.1, @@ -90,7 +90,7 @@ def test_e2e(serve_instance): ) class A: def __call__(self): - time.sleep(0.1) + ray.get(signal.wait.remote()) handle = serve.run(A.bind()) dep_id = DeploymentID("A", "default") @@ -113,11 +113,13 @@ def last_timestamp_value_high(): metrics = list(data.values()) assert len(metrics) == 2 assert metrics[0] > 0 and metrics[1] > 0 - assert sum(metrics) > 25 + assert sum(metrics) > 40 return True wait_for_condition(last_timestamp_value_high) print("Confirmed there are metrics from 2 replicas, and many queries are inflight.") + print("Releasing signal.") + signal.send.remote() def check_running_replicas(expected): replicas = ray.get(