ray-project · bveeramani · Aug 2, 2024 · Jul 18, 2024 · Jul 19, 2024 · Jul 19, 2024
@@ -185,6 +185,7 @@ def __init__(
         self._in_task_submission_backpressure = False
         self._metrics = OpRuntimeMetrics(self)
         self._estimated_num_output_bundles = None
+        self._estimated_output_num_rows = None
         self._execution_completed = False
 
     def __reduce__(self):
@@ -276,6 +277,19 @@ def num_outputs_total(self) -> Optional[int]:
         """
         return self._estimated_num_output_bundles
 
+    def num_output_rows_total(self) -> Optional[int]:
+        """Returns the total number of output rows of this operator,
+        or ``None`` if unable to provide a reasonable estimate (for example,
+        if no tasks have finished yet).
+
+        The value returned may be an estimate based off the consumption so far.
+        This is useful for reporting progress.
+
+        Subclasses should either override this method, or update
+        ``self._estimated_output_num_rows`` appropriately.
+        """
+        return self._estimated_output_num_rows
+
     def start(self, options: ExecutionOptions) -> None:
         """Called by the executor when execution starts for an operator.
 
@@ -459,8 +473,3 @@ def implements_accurate_memory_accounting(self) -> bool:
     def supports_fusion(self) -> bool:
         """Returns ```True``` if this operator can be fused with other operators."""
         return False
-
-    @property
-    def estimated_output_num_rows(self) -> Optional[int]:
-        """Return the estimated number of output rows for this operator."""
-        return None
@@ -116,6 +116,14 @@ def num_outputs_total(self) -> Optional[int]:
             return self._cur_output_bundles
         return self._estimated_num_output_bundles
 
+    def num_output_rows_total(self) -> Optional[int]:
+        # The total number of rows is simply the limit or the number
+        # of input rows, whichever is smaller
+        input_num_rows = self.input_dependencies[0].num_output_rows_total()
+        if input_num_rows is None:
+            return None
+        return min(self._limit, input_num_rows)
+
     def throttling_disabled(self) -> bool:
         return True
 

@@ -433,10 +433,6 @@ def num_active_tasks(self) -> int:
         #   to reflect the actual data processing tasks.
         return len(self._data_tasks)
 
-    @property
-    def estimated_output_num_rows(self) -> Optional[int]:
-        return getattr(self, "_estimated_output_num_rows", 0)
-
 
 def _map_task(
     map_transformer: MapTransformer,

@@ -74,6 +74,10 @@ def num_outputs_total(self) -> Optional[int]:
         # so we can return the number of blocks from the input op.
         return self.input_dependencies[0].num_outputs_total()
 
+    def num_output_rows_total(self) -> Optional[int]:
+        # The total number of rows is the same as the number of input rows.
+        return self.input_dependencies[0].num_output_rows_total()
+
     def start(self, options: ExecutionOptions) -> None:
         super().start(options)
         # Force disable locality optimization.

@@ -55,6 +55,15 @@ def num_outputs_total(self) -> Optional[int]:
             num_outputs += input_num_outputs
         return num_outputs
 
+    def num_output_rows_total(self) -> Optional[int]:
+        total_rows = 0
+        for input_op in self.input_dependencies:
+            input_num_rows = input_op.num_output_rows_total()
+            if input_num_rows is None:
+                return None
+            total_rows += input_num_rows
+        return total_rows
+
     def _add_input_inner(self, refs: RefBundle, input_index: int) -> None:
         assert not self.completed()
         assert 0 <= input_index <= len(self._input_dependencies), input_index

@@ -53,6 +53,16 @@ def num_outputs_total(self) -> Optional[int]:
         else:
             return right_num_outputs
 
+    def num_output_rows_total(self) -> Optional[int]:
+        left_num_rows = self.input_dependencies[0].num_output_rows_total()
+        right_num_rows = self.input_dependencies[1].num_output_rows_total()
+        if left_num_rows is not None and right_num_rows is not None:
+            return max(left_num_rows, right_num_rows)
+        elif left_num_rows is not None:
+            return left_num_rows
+        else:
+            return right_num_rows
+
     def _add_input_inner(self, refs: RefBundle, input_index: int) -> None:
         assert not self.completed()
         assert input_index == 0 or input_index == 1, input_index

@@ -124,8 +124,10 @@ def execute(
 
         if not isinstance(dag, InputDataBuffer):
             # Note: DAG must be initialized in order to query num_outputs_total.
+            # TODO(zhilong): Implement num_output_rows_total for all
+            # AllToAllOperators
             self._global_info = ProgressBar(
-                "Running", dag.estimated_output_num_rows, unit="row"
+                "Running", dag.num_output_rows_total(), unit="row"
             )
 
         self._output_node: OpState = self._topology[dag]

@@ -210,7 +210,7 @@ def initialize_progress_bars(self, index: int, verbose_progress: bool) -> int:
         )
         self.progress_bar = ProgressBar(
             "- " + self.op.name,
-            self.op.estimated_output_num_rows,
+            self.op.num_output_rows_total(),
             unit="row",
             position=index,
             enabled=progress_bar_enabled,
@@ -242,8 +242,7 @@ def add_output(self, ref: RefBundle) -> None:
         self.outqueue.append(ref)
         self.num_completed_tasks += 1
         if self.progress_bar:
-            num_rows = sum(meta.num_rows for _, meta in ref.blocks)
-            self.progress_bar.update(num_rows, self.op.estimated_output_num_rows)
+            self.progress_bar.update(ref.num_rows(), self.op.num_output_rows_total())
 
     def refresh_progress_bar(self, resource_manager: ResourceManager) -> None:
         """Update the console with the latest operator progress."""

diff --git a/python/ray/data/_internal/progress_bar.py b/python/ray/data/_internal/progress_bar.py
@@ -3,6 +3,7 @@
 
 import ray
 from ray.experimental import tqdm_ray
+from ray.experimental.tqdm_ray import format_num
 from ray.types import ObjectRef
 from ray.util.annotations import Deprecated
 
@@ -119,16 +120,25 @@ def fetch_until_complete(self, refs: List[ObjectRef]) -> List[Any]:
     def set_description(self, name: str) -> None:
         if self._bar and name != self._desc:
             self._desc = name
-            self._bar.set_description(self._desc)
+            formatted_progress = format_num(self._progress)
+            formatted_total = (
+                format_num(self._bar.total) if self._bar.total is not None else "??"
+            )
+            self._bar.set_description(
+                f"{self._desc} {formatted_progress}/{formatted_total}"
+            )
 
     def update(self, i: int = 0, total: Optional[int] = None) -> None:
         if self._bar and (i != 0 or self._bar.total != total):
             self._progress += i
             if total is not None:
                 self._bar.total = total
-            if self._bar.total is not None and self._progress > self._bar.total:
-                # If the progress goes over 100%, update the total.
-                self._bar.total = self._progress
+                formatted_total = format_num(self._bar.total)
+            formatted_progress = format_num(self._progress)
+            self._bar.set_description(
+                f"{self._desc} {formatted_progress} \
+                    /{formatted_total if total is not None else '??'}"
+            )
             self._bar.update(i)
 
     def close(self):

diff --git a/python/ray/experimental/tqdm_ray.py b/python/ray/experimental/tqdm_ray.py
@@ -51,6 +51,13 @@ def safe_print(*args, **kwargs):
         instance().unhide_bars()
 
 
+def format_num(n):
+    """Intelligent scientific notation (.3g)."""
+    f = f"{n:.3g}".replace("e+0", "e+").replace("e-0", "e-")
+    n = str(n)
+    return f if len(f) < len(n) else n
+
+
 class tqdm:
     """Experimental: Ray distributed tqdm implementation.
 
@@ -99,7 +106,8 @@ def __init__(
 
     def set_description(self, desc):
         """Implements tqdm.tqdm.set_description."""
-        self._desc = desc
+        self._desc = f"{desc} ({format_num(self._x)}\
+            /{format_num(self._total) if self._total else '??'})"
         self._dump_state()
 
     def update(self, n=1):
@@ -139,11 +147,13 @@ def _dump_state(self, force_flush=False) -> None:
             instance().process_state_update(copy.deepcopy(self._get_state()))
 
     def _get_state(self) -> ProgressBarState:
+        """Get the formatted state of the progress bar."""
         return {
             "__magic_token__": RAY_TQDM_MAGIC,
             "x": self._x,
             "pos": self._pos,
-            "desc": self._desc,
+            "desc": f"{self._desc} {format_num(self._x)}\
+                /{format_num(self._total) if self._total else '??'}",
             "total": self._total,
             "unit": self._unit,
             "ip": self._ip,