[BUG] Fix Ray autoscaling from zero worker CPUs (#1884)

Tested on KubeRay
Eventual-Inc · Feb 15, 2024 · b62e9ad · b62e9ad
1 parent 7973b32
commit b62e9ad
Showing 1 changed file with 4 additions and 1 deletion.
diff --git a/daft/runners/ray_runner.py b/daft/runners/ray_runner.py
@@ -526,7 +526,10 @@ def place_in_queue(item):
                     while is_active():  # Loop: Dispatch (get tasks -> batch dispatch).
                         tasks_to_dispatch: list[PartitionTask] = []
 
-                        cores: int = max(next(num_cpus_provider) - self.reserved_cores, 0)
+                        # TODO: improve control loop code to be more understandable and dynamically adjust backlog
+                        cores: int = max(
+                            next(num_cpus_provider) - self.reserved_cores, 1
+                        )  # assume at least 1 CPU core for bootstrapping clusters that scale from zero
                         max_inflight_tasks = cores + self.max_task_backlog
                         dispatches_allowed = max_inflight_tasks - len(inflight_tasks)
                         dispatches_allowed = min(cores, dispatches_allowed)