ray-project · ericl · Jan 26, 2023 · Jan 10, 2023 · Jan 11, 2023 · Jan 11, 2023
@@ -3,6 +3,9 @@
 from ray.data.block import Block, BlockMetadata
 from ray.data.context import DatasetContext
 from ray.data.context import DEFAULT_SCHEDULING_STRATEGY
+from ray.data._internal.execution.interfaces import (
+    ExecutionOptions,
+)
 from ray.data._internal.execution.operators.map_task_submitter import (
     MapTaskSubmitter,
     _map_task,
@@ -28,8 +31,7 @@ def __init__(
             ray_remote_args: Remote arguments for the Ray actors to be created.
             pool_size: The size of the actor pool.
         """
-        self._transform_fn_ref = transform_fn_ref
-        self._ray_remote_args = ray_remote_args
+        super().__init__(transform_fn_ref, ray_remote_args)
         self._pool_size = pool_size
         # A map from task output futures to the actors on which they are running.
         self._active_actors: Dict[ObjectRef[Block], ray.actor.ActorHandle] = {}
@@ -39,7 +41,8 @@ def __init__(
     def progress_str(self) -> str:
         return f"{self._actor_pool.size()} actors"
 
-    def start(self):
+    def start(self, options: ExecutionOptions):
+        super().start(options)
         # Create the actor workers and add them to the pool.
         ray_remote_args = self._apply_default_remote_args(self._ray_remote_args)
         cls_ = ray.remote(**ray_remote_args)(MapWorker)

@@ -76,7 +76,7 @@ def __init__(
         self._output_queue: Optional[_OutputQueue] = None
 
     def start(self, options: ExecutionOptions) -> None:
-        self._task_submitter.start()
+        self._task_submitter.start(options)
         if options.preserve_order:
             self._output_queue = _OrderedOutputQueue()
         else:

@@ -1,7 +1,13 @@
 from abc import ABC, abstractmethod
-from typing import List, Union, Tuple, Callable, Iterator
+from typing import Dict, Any, List, Union, Tuple, Callable, Iterator
+
+import ray
 from ray.data.block import Block, BlockAccessor, BlockMetadata, BlockExecStats
+from ray.data._internal.execution.interfaces import (
+    ExecutionOptions,
+)
 from ray.types import ObjectRef
+from ray.util.scheduling_strategies import NodeAffinitySchedulingStrategy
 from ray._raylet import ObjectRefGenerator
 
 
@@ -13,14 +19,35 @@ class MapTaskSubmitter(ABC):
     submission is done.
     """
 
-    def start(self):
+    def __init__(
+        self,
+        transform_fn_ref: ObjectRef[Callable[[Iterator[Block]], Iterator[Block]]],
+        ray_remote_args: Dict[str, Any],
+    ):
+        """Create a TaskPoolSubmitter instance.
+
+        Args:
+            transform_fn_ref: The function to apply to a block bundle in the submitted
+                map task.
+            ray_remote_args: Remote arguments for the Ray tasks to be launched.
+        """
+        self._transform_fn_ref = transform_fn_ref
+        self._ray_remote_args = ray_remote_args
+
+    def start(self, options: ExecutionOptions):
         """Start the task submitter so it's ready to submit tasks.
 
         This is called when execution of the map operator actually starts, and is where
         the submitter can initialize expensive state, reserve resources, start workers,
         etc.
         """
-        pass
+        if options.locality_with_output:
+            self._ray_remote_args[
+                "scheduling_strategy"
+            ] = NodeAffinitySchedulingStrategy(
+                ray.get_runtime_context().get_node_id(),
+                soft=True,
+            )
 
     @abstractmethod
     def submit(

@@ -1,4 +1,4 @@
-from typing import Dict, Any, Iterator, Callable, Union, List
+from typing import Union, List
 
 import ray
 from ray.data.block import Block
@@ -14,21 +14,6 @@
 class TaskPoolSubmitter(MapTaskSubmitter):
     """A task submitter for MapOperator that uses normal Ray tasks."""
 
-    def __init__(
-        self,
-        transform_fn_ref: ObjectRef[Callable[[Iterator[Block]], Iterator[Block]]],
-        ray_remote_args: Dict[str, Any],
-    ):
-        """Create a TaskPoolSubmitter instance.
-
-        Args:
-            transform_fn_ref: The function to apply to a block bundle in the submitted
-                map task.
-            ray_remote_args: Remote arguments for the Ray tasks to be launched.
-        """
-        self._transform_fn_ref = transform_fn_ref
-        self._ray_remote_args = ray_remote_args
-
     def submit(
         self, input_blocks: List[ObjectRef[Block]]
     ) -> ObjectRef[ObjectRefGenerator]:

@@ -449,12 +449,11 @@ def execute_to_iterator(
             )
 
         from ray.data._internal.execution.streaming_executor import StreamingExecutor
-        from ray.data._internal.execution.interfaces import ExecutionOptions
         from ray.data._internal.execution.legacy_compat import (
             execute_to_legacy_block_iterator,
         )
 
-        executor = StreamingExecutor(ExecutionOptions(preserve_order=False))
+        executor = StreamingExecutor(copy.deepcopy(ctx.execution_options))
         block_iter = execute_to_legacy_block_iterator(
             executor,
             self,
@@ -500,12 +499,11 @@ def execute(
         if not self.has_computed_output():
             if self._run_with_new_execution_backend():
                 from ray.data._internal.execution.bulk_executor import BulkExecutor
-                from ray.data._internal.execution.interfaces import ExecutionOptions
                 from ray.data._internal.execution.legacy_compat import (
                     execute_to_legacy_block_list,
                 )
 
-                executor = BulkExecutor(ExecutionOptions())
+                executor = BulkExecutor(copy.deepcopy(context.execution_options))
                 blocks = execute_to_legacy_block_list(
                     executor,
                     self,

@@ -1,10 +1,13 @@
 import os
 import threading
-from typing import Optional
+from typing import Optional, TYPE_CHECKING
 
 from ray.util.annotations import DeveloperAPI
 from ray.util.scheduling_strategies import SchedulingStrategyT
 
+if TYPE_CHECKING:
+    from ray.data._internal.execution.interfaces import ExecutionOptions
+
 # The context singleton on this process.
 _default_context: "Optional[DatasetContext]" = None
 _context_lock = threading.Lock()
@@ -144,6 +147,7 @@ def __init__(
         enable_auto_log_stats: bool,
         trace_allocations: bool,
         optimizer_enabled: bool,
+        execution_options: "ExecutionOptions",
     ):
         """Private constructor (use get_current() instead)."""
         self.block_splitting_enabled = block_splitting_enabled
@@ -171,6 +175,8 @@ def __init__(
         self.enable_auto_log_stats = enable_auto_log_stats
         self.trace_allocations = trace_allocations
         self.optimizer_enabled = optimizer_enabled
+        # TODO: expose execution options in Dataset public APIs.
+        self.execution_options = execution_options
 
     @staticmethod
     def get_current() -> "DatasetContext":
@@ -179,6 +185,8 @@ def get_current() -> "DatasetContext":
         If the context has not yet been created in this process, it will be
         initialized with default settings.
         """
+        from ray.data._internal.execution.interfaces import ExecutionOptions
+
         global _default_context
 
         with _context_lock:
@@ -213,6 +221,7 @@ def get_current() -> "DatasetContext":
                     enable_auto_log_stats=DEFAULT_AUTO_LOG_STATS,
                     trace_allocations=DEFAULT_TRACE_ALLOCATIONS,
                     optimizer_enabled=DEFAULT_OPTIMIZER_ENABLED,
+                    execution_options=ExecutionOptions(),
                 )
 
             return _default_context

@@ -5,6 +5,7 @@
 from typing import List, Any
 
 import ray
+from ray.data.context import DatasetContext
 from ray.data._internal.execution.interfaces import (
     ExecutionOptions,
     ExecutionResources,
@@ -27,6 +28,7 @@
 from ray.data._internal.execution.operators.map_operator import MapOperator
 from ray.data._internal.execution.operators.input_data_buffer import InputDataBuffer
 from ray.data._internal.execution.util import make_ref_bundles
+from ray.util.scheduling_strategies import NodeAffinitySchedulingStrategy
 
 
 @ray.remote
@@ -291,6 +293,75 @@ def reverse_sort(inputs: List[RefBundle]):
     assert output == expected, (output, expected)
 
 
+def test_e2e_option_propagation():
+    DatasetContext.get_current().new_execution_backend = True
+    DatasetContext.get_current().use_streaming_executor = True
+
+    def run():
+        ray.data.range(5, parallelism=5).map(
+            lambda x: x, compute=ray.data.ActorPoolStrategy(2, 2)
+        ).take_all()
+
+    DatasetContext.get_current().execution_options.resource_limits = (
+        ExecutionResources()
+    )
+    run()
+
+    DatasetContext.get_current().execution_options.resource_limits.cpu = 1
+    with pytest.raises(ValueError):
+        run()
+
+
+def test_configure_spread_e2e():
+    from ray import remote_function
+
+    tasks = []
+
+    def _test_hook(fn, args, strategy):
+        if "map_task" in str(fn):
+            tasks.append(strategy)
+
+    remote_function._task_launch_hook = _test_hook
+    DatasetContext.get_current().use_streaming_executor = True
+    DatasetContext.get_current().execution_options.preserve_order = True
+
+    # Simple 2-stage pipeline.
+    ray.data.range(2, parallelism=2).map(lambda x: x, num_cpus=2).take_all()
+
+    # Read tasks get SPREAD by default, subsequent ones use default policy.
+    tasks = sorted(tasks)
+    assert tasks == ["DEFAULT", "DEFAULT", "SPREAD", "SPREAD"]
+
+
+def test_configure_output_locality():
+    inputs = make_ref_bundles([[x] for x in range(20)])
+    o1 = InputDataBuffer(inputs)
+    o2 = MapOperator(make_transform(lambda block: [b * -1 for b in block]), o1)
+    o3 = MapOperator(
+        make_transform(lambda block: [b * 2 for b in block]),
+        o2,
+        compute_strategy=ray.data.ActorPoolStrategy(1, 1),
+    )
+    topo, _ = build_streaming_topology(o3, ExecutionOptions(locality_with_output=False))
+    assert (
+        o2._execution_state._task_submitter._ray_remote_args.get("scheduling_strategy")
+        is None
+    )
+    assert (
+        o3._execution_state._task_submitter._ray_remote_args.get("scheduling_strategy")
+        is None
+    )
+    topo, _ = build_streaming_topology(o3, ExecutionOptions(locality_with_output=True))
+    assert isinstance(
+        o2._execution_state._task_submitter._ray_remote_args["scheduling_strategy"],
+        NodeAffinitySchedulingStrategy,
+    )
+    assert isinstance(
+        o3._execution_state._task_submitter._ray_remote_args["scheduling_strategy"],
+        NodeAffinitySchedulingStrategy,
+    )
+
+
 if __name__ == "__main__":
     import sys