[Serve] Mark long_running_serve_failure test as stable (#32063)

The long_running_serve_failure release test is marked as unstable due to recent failures. Recently, #31945 and #32011 have resolved the root causes of these failures. After those changes, the test ran successfully for 15+ hours without failure. This change limits the test's iterations, so it doesn't run forever, and it marks the test as stable.
ray-project · Jan 30, 2023 · b350f8d · b350f8d
1 parent fe729aa
commit b350f8d
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 5 deletions.
diff --git a/release/long_running_tests/workloads/serve_failure.py b/release/long_running_tests/workloads/serve_failure.py
@@ -25,6 +25,8 @@
 
 # RandomTest setup constants
 CPUS_PER_NODE = 10
+NUM_ITERATIONS = 350
+ACTIONS_PER_ITERATION = 20
 
 RAY_UNIT_TEST = "RAY_UNIT_TEST" in os.environ
 
@@ -138,11 +140,10 @@ def verify_deployment(self):
                 time.sleep(0.01)
 
     def run(self):
-        iteration = 0
         start_time = time.time()
         previous_time = start_time
-        while True:
-            for _ in range(20):
+        for iteration in range(NUM_ITERATIONS):
+            for _ in range(ACTIONS_PER_ITERATION):
                 actions, weights = zip(*self.weighted_actions)
                 action_chosen = random.choices(actions, weights=weights)[0]
                 print(f"Executing {action_chosen}")
@@ -166,7 +167,6 @@ def run(self):
                 }
             )
             previous_time = new_time
-            iteration += 1
 
             if RAY_UNIT_TEST:
                 break

diff --git a/release/release_tests.yaml b/release/release_tests.yaml
@@ -2062,7 +2062,7 @@
   group: Long running tests
   working_dir: long_running_tests
 
-  stable: false
+  stable: true
 
   legacy:
     test_name: serve_failure