[RLlib] Overhaul of the typing module & better device typing (#55291)

Daraan · gemini-code-assist[bot] · kamil-kaczmarek · web-flow · commit b3759077ff44 · 2025-10-22T12:26:27.000+02:00
Resolves: #55288 (wrong `np.array` in `TensorType`) Furthermore changes: - Changed comments to (semi)docstring which will be displayed as tooltips by IDEs (e.g. VSCode + Pylance) making that information available to the user. - `AgentID: Any -> Hashable` as it used for dict keys - changed `DeviceType` to be not a TypeVar (makes no sense in the way it is currently used), also includes DeviceLikeType (`int | str | device`) from `torch`. IMO it can fully replace the current type but being defensive I only added it as an extra possible type - Used updated DeviceType to improve type of Runner._device and make it more correct - Used torch's own type in `data`, current code supports more than just `str`. I refrained from adding a reference to `rllib` despite it being nice if they would be in sync. - Some extra formatting that is forced by pre-commit  --- > [!NOTE] > Revamps `rllib.utils.typing` (NDArray-based `TensorType`, broader `DeviceType`, `AgentID` as `Hashable`, docstring cleanups) and updates call sites to use optional device typing and improved hints. > > - **Types**: > - Overhaul `rllib/utils/typing.py`: > - `TensorType` now uses `numpy.typing.NDArray`; heavy use of `TYPE_CHECKING` to avoid runtime deps on torch/tf/jax. > - `DeviceType` widened to `Union[str, torch.device, int]` (was `TypeVar`). > - `AgentID` tightened to `Hashable`; `NetworkType` uses `keras.Model`. > - Refined aliases (e.g., `FromConfigSpec`, `SpaceStruct`) and added concise docstrings. > - **Runners**: > - `Runner._device` now `Optional` (`Union[DeviceType, None]`) with updated docstring; same change in offline runners’ `_device` properties. > - **Connectors**: > - `NumpyToTensor`: `device` param typed as `Optional[DeviceType]` (via `TYPE_CHECKING`). > - **Utils**: > - `from_config`: typed `config: Optional[FromConfigSpec]` with `TYPE_CHECKING` import. > - **Misc**: > - Minor formatting/import ordering and comment typo fixes. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit ae2e422. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup>  --------- Signed-off-by: Daniel Sperber <github.blurry@9ox.net> Signed-off-by: Daraan <github.blurry@9ox.net> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Kamil Kaczmarek <kaczmarek.poczta@gmail.com> Co-authored-by: Kamil Kaczmarek <kamil@anyscale.com>
diff --git a/rllib/connectors/common/numpy_to_tensor.py b/rllib/connectors/common/numpy_to_tensor.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Optional
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
 import gymnasium as gym
 
@@ -12,6 +12,9 @@
 from ray.rllib.utils.typing import EpisodeType
 from ray.util.annotations import PublicAPI
 
+if TYPE_CHECKING:
+    from ray.rllib.utils.typing import DeviceType
+
 
 @PublicAPI(stability="alpha")
 class NumpyToTensor(ConnectorV2):
@@ -59,7 +62,7 @@ def __init__(
         input_action_space: Optional[gym.Space] = None,
         *,
         pin_memory: bool = False,
-        device: Optional[str] = None,
+        device: Optional["DeviceType"] = None,
         **kwargs,
     ):
         """Initializes a NumpyToTensor instance.
diff --git a/rllib/core/learner/differentiable_learner.py b/rllib/core/learner/differentiable_learner.py
@@ -1,17 +1,18 @@
 import abc
 import logging
-import numpy
 from typing import (
+    TYPE_CHECKING,
     Any,
     Collection,
     Dict,
     Iterable,
     Optional,
     Tuple,
-    TYPE_CHECKING,
     Union,
 )
 
+import numpy
+
 from ray.rllib.connectors.learner.learner_connector_pipeline import (
     LearnerConnectorPipeline,
 )
@@ -22,19 +23,19 @@
 from ray.rllib.policy.sample_batch import MultiAgentBatch, SampleBatch
 from ray.rllib.utils import unflatten_dict
 from ray.rllib.utils.annotations import (
-    override,
     OverrideToImplementCustomLogic,
     OverrideToImplementCustomLogic_CallToSuperRecommended,
+    override,
 )
 from ray.rllib.utils.checkpoints import Checkpointable
 from ray.rllib.utils.metrics import (
     DATASET_NUM_ITERS_TRAINED,
     DATASET_NUM_ITERS_TRAINED_LIFETIME,
+    MODULE_TRAIN_BATCH_SIZE_MEAN,
     NUM_ENV_STEPS_TRAINED,
     NUM_ENV_STEPS_TRAINED_LIFETIME,
     NUM_MODULE_STEPS_TRAINED,
     NUM_MODULE_STEPS_TRAINED_LIFETIME,
-    MODULE_TRAIN_BATCH_SIZE_MEAN,
     WEIGHTS_SEQ_NO,
 )
 from ray.rllib.utils.metrics.metrics_logger import MetricsLogger
@@ -124,7 +125,7 @@ def build(self, device: Optional[DeviceType] = None) -> None:
         if self._is_built:
             logger.debug("DifferentiableLearner already built. Skipping built.")
 
-        # If a dvice was passed, set the `DifferentiableLearner`'s device.
+        # If a device was passed, set the `DifferentiableLearner`'s device.
         if device:
             self._device = device
 
diff --git a/rllib/offline/offline_evaluation_runner.py b/rllib/offline/offline_evaluation_runner.py
@@ -389,9 +389,11 @@ def set_device(self):
         try:
             self.__device = get_device(
                 self.config,
-                0
-                if not self.worker_index
-                else self.config.num_gpus_per_offline_eval_runner,
+                (
+                    0
+                    if not self.worker_index
+                    else self.config.num_gpus_per_offline_eval_runner
+                ),
             )
         except NotImplementedError:
             self.__device = None
@@ -456,7 +458,7 @@ def _batch_iterator(self) -> MiniBatchRayDataIterator:
         return self.__batch_iterator
 
     @property
-    def _device(self) -> DeviceType:
+    def _device(self) -> Union[DeviceType, None]:
         return self.__device
 
     @property
diff --git a/rllib/offline/offline_policy_evaluation_runner.py b/rllib/offline/offline_policy_evaluation_runner.py
@@ -102,9 +102,11 @@ def __call__(self, batch: Dict[str, numpy.ndarray]) -> Dict[str, numpy.ndarray]:
             # TODO (simon): Refactor into a single code block for both cases.
             episodes = self.episode_buffer.sample(
                 num_items=self.config.train_batch_size_per_learner,
-                batch_length_T=self.config.model_config.get("max_seq_len", 0)
-                if self._module.is_stateful()
-                else None,
+                batch_length_T=(
+                    self.config.model_config.get("max_seq_len", 0)
+                    if self._module.is_stateful()
+                    else None
+                ),
                 n_step=self.config.get("n_step", 1) or 1,
                 # TODO (simon): This can be removed as soon as DreamerV3 has been
                 # cleaned up, i.e. can use episode samples for training.
@@ -131,9 +133,11 @@ def __call__(self, batch: Dict[str, numpy.ndarray]) -> Dict[str, numpy.ndarray]:
             # Sample steps from the buffer.
             episodes = self.episode_buffer.sample(
                 num_items=self.config.train_batch_size_per_learner,
-                batch_length_T=self.config.model_config.get("max_seq_len", 0)
-                if self._module.is_stateful()
-                else None,
+                batch_length_T=(
+                    self.config.model_config.get("max_seq_len", 0)
+                    if self._module.is_stateful()
+                    else None
+                ),
                 n_step=self.config.get("n_step", 1) or 1,
                 # TODO (simon): This can be removed as soon as DreamerV3 has been
                 # cleaned up, i.e. can use episode samples for training.
@@ -241,14 +245,14 @@ def _create_batch_iterator(self, **kwargs) -> Iterable:
         # Define the collate function that converts the flattened dictionary
         # to a `MultiAgentBatch` with Tensors.
         def _collate_fn(
-            _batch: Dict[str, numpy.ndarray]
+            _batch: Dict[str, numpy.ndarray],
         ) -> Dict[EpisodeID, Dict[str, numpy.ndarray]]:
 
             return _batch["episodes"]
 
         # Define the finalize function that makes the host-to-device transfer.
         def _finalize_fn(
-            _batch: Dict[EpisodeID, Dict[str, numpy.ndarray]]
+            _batch: Dict[EpisodeID, Dict[str, numpy.ndarray]],
         ) -> Dict[EpisodeID, Dict[str, TensorType]]:
 
             return [
@@ -556,9 +560,11 @@ def set_device(self):
         try:
             self.__device = get_device(
                 self.config,
-                0
-                if not self.worker_index
-                else self.config.num_gpus_per_offline_eval_runner,
+                (
+                    0
+                    if not self.worker_index
+                    else self.config.num_gpus_per_offline_eval_runner
+                ),
             )
         except NotImplementedError:
             self.__device = None
@@ -613,7 +619,7 @@ def _batch_iterator(self) -> MiniBatchRayDataIterator:
         return self.__batch_iterator
 
     @property
-    def _device(self) -> DeviceType:
+    def _device(self) -> Union[DeviceType, None]:
         return self.__device
 
     @property
diff --git a/rllib/utils/from_config.py b/rllib/utils/from_config.py
@@ -10,9 +10,14 @@
 from ray.rllib.utils import force_list, merge_dicts
 from ray.rllib.utils.annotations import DeveloperAPI
 
+from typing import Optional, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from ray.rllib.utils.typing import FromConfigSpec
+
 
 @DeveloperAPI
-def from_config(cls, config=None, **kwargs):
+def from_config(cls, config: Optional["FromConfigSpec"] = None, **kwargs):
     """Uses the given config to create an object.
 
     If `config` is a dict, an optional "type" key can be used as a
diff --git a/rllib/utils/runners/runner.py b/rllib/utils/runners/runner.py
@@ -1,6 +1,7 @@
 import abc
 import logging
-from typing import TYPE_CHECKING, Any
+
+from typing import TYPE_CHECKING, Any, Union
 
 from ray.rllib.utils.actor_manager import FaultAwareApply
 from ray.rllib.utils.metrics.metrics_logger import MetricsLogger
@@ -86,8 +87,8 @@ def stop(self) -> None:
 
     @property
     @abc.abstractmethod
-    def _device(self) -> DeviceType:
-        """Returns the device of this `Runner`."""
+    def _device(self) -> Union[DeviceType, None]:
+        """Returns the device of this `Runner`. None if framework is not supported."""
         pass
 
     @abc.abstractmethod
diff --git a/rllib/utils/typing.py b/rllib/utils/typing.py