diff --git a/python/ray/serve/tests/test_autoscaling_metrics.py b/python/ray/serve/tests/test_autoscaling_metrics.py
index 1b1eb6e0d8c28..148a6eb871e84 100644
--- a/python/ray/serve/tests/test_autoscaling_metrics.py
+++ b/python/ray/serve/tests/test_autoscaling_metrics.py
@@ -1,8 +1,6 @@
-import time
-
 import ray
 from ray import serve
-from ray._private.test_utils import wait_for_condition
+from ray._private.test_utils import SignalActor, wait_for_condition
 from ray.serve._private.autoscaling_metrics import InMemoryMetricsStore
 from ray.serve._private.common import DeploymentID, ReplicaState
 
@@ -72,6 +70,8 @@ def test_multiple_metrics(self):
 
 
 def test_e2e(serve_instance):
+    signal = SignalActor.remote()
+
     @serve.deployment(
         autoscaling_config={
             "metrics_interval_s": 0.1,
@@ -90,7 +90,7 @@ def test_e2e(serve_instance):
     )
     class A:
         def __call__(self):
-            time.sleep(0.1)
+            ray.get(signal.wait.remote())
 
     handle = serve.run(A.bind())
     dep_id = DeploymentID("A", "default")
@@ -113,11 +113,13 @@ def last_timestamp_value_high():
         metrics = list(data.values())
         assert len(metrics) == 2
         assert metrics[0] > 0 and metrics[1] > 0
-        assert sum(metrics) > 25
+        assert sum(metrics) > 40
         return True
 
     wait_for_condition(last_timestamp_value_high)
     print("Confirmed there are metrics from 2 replicas, and many queries are inflight.")
+    print("Releasing signal.")
+    signal.send.remote()
 
     def check_running_replicas(expected):
         replicas = ray.get(