[rllib] Re-enable all RLlib doctests (#58974)

pseudo-rnd-thoughts · Mark Towers · web-flow · commit 305969b0d41f · 2025-12-01T15:15:38.000+01:00
## Description
The RLlib team is working on improving our testing position. 
Currently several files are excluded in our doctest. This PR moves to
add testing for the whole project

---------

Signed-off-by: Mark Towers &lt;mark@anyscale.com&gt;
Signed-off-by: Mark Towers &lt;mark.m.towers@gmail.com&gt;
Co-authored-by: Mark Towers &lt;mark@anyscale.com&gt;
diff --git a/rllib/BUILD.bazel b/rllib/BUILD.bazel
@@ -72,36 +72,10 @@ doctest(
             "**/examples/**",
             "**/tests/**",
             "**/test_*.py",
-            # Deprecated modules
-            "utils/memory.py",
+            # Deprecated stub files that raise ValueError on import.
             "offline/off_policy_estimator.py",
             "offline/estimators/feature_importance.py",
-            # Missing imports
-            "algorithms/dreamerv3/**",
-            # FIXME: These modules contain broken examples that weren't previously
-            # tested.
-            "algorithms/algorithm_config.py",
-            "core/distribution/torch/torch_distribution.py",
-            "core/models/base.py",
-            "core/models/specs/specs_base.py",
-            "core/models/specs/specs_dict.py",
-            "env/wrappers/pettingzoo_env.py",
-            "evaluation/collectors/sample_collector.py",
-            "evaluation/metrics.py",
-            "evaluation/observation_function.py",
-            "evaluation/postprocessing.py",
-            "execution/buffers/mixin_replay_buffer.py",
-            "models/catalog.py",
-            "models/preprocessors.py",
-            "models/repeated_values.py",
-            "policy/rnn_sequencing.py",
-            "utils/actor_manager.py",
-            "utils/filter.py",
-            "utils/from_config.py",
-            "utils/metrics/window_stat.py",
-            "utils/pre_checks/env.py",
-            "utils/replay_buffers/multi_agent_mixin_replay_buffer.py",
-            "utils/spaces/space_utils.py",
+            "utils/memory.py",
         ],
     ),
     tags = ["team:rllib"],
diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py
@@ -111,7 +111,7 @@ class AlgorithmConfig(_Config):
     .. testcode::
 
         from ray.rllib.algorithms.ppo import PPOConfig
-        from ray.rllib.algorithms.callbacks import MemoryTrackingCallbacks
+        from ray.rllib.callbacks.callbacks import MemoryTrackingCallbacks
         # Construct a generic config object, specifying values within different
         # sub-categories, e.g. "training".
         config = (
@@ -6134,7 +6134,13 @@ class DifferentiableAlgorithmConfig(AlgorithmConfig):
 
     .. testcode::
 
-        from ray.rllib.algorithm.algorithm_config import DifferentiableAlgorithmConfig
+        from ray.rllib.algorithms.algorithm_config import DifferentiableAlgorithmConfig
+        from ray.rllib.core.learner.differentiable_learner_config import (
+            DifferentiableLearnerConfig,
+        )
+        from ray.rllib.core.learner.torch.torch_differentiable_learner import (
+            TorchDifferentiableLearner,
+        )
         # Construct a generic config for an algorithm that needs differentiable Learners.
         config = (
             DifferentiableAlgorithmConfig()
@@ -6143,15 +6149,14 @@ class DifferentiableAlgorithmConfig(AlgorithmConfig):
             .learners(
                 differentiable_learner_configs=[
                     DifferentiableLearnerConfig(
-                        DifferentiableTorchLearner,
+                        TorchDifferentiableLearner,
                         lr=1e-4,
                     )
                 ]
             )
         )
-        # Similar to `AlgorithmConfig` the config using differentiable Learners can be
-        # used to build a respective `Algorithm`.
-        algo = config.build()
+        # The config is then used to configure a MetaLearner, see
+        # `rllib/examples/algorithms/maml_lr_supervised_learning.py` for a full example.
 
 
     """
diff --git a/rllib/algorithms/dreamerv3/dreamerv3.py b/rllib/algorithms/dreamerv3/dreamerv3.py
@@ -83,11 +83,6 @@ class DreamerV3Config(AlgorithmConfig):
         algo = config.build()
         # algo.train()
         del algo
-
-    .. testoutput::
-        :hide:
-
-        ...
     """
 
     def __init__(self, algo_class=None):
diff --git a/rllib/core/models/base.py b/rllib/core/models/base.py
@@ -41,6 +41,12 @@ def __init__(self, config):
             def _forward(self, input_dict):
                 return input_dict["obs"] * self.my_param
 
+            def get_num_parameters(self):
+                return (0, 0)
+
+            def _set_to_dummy_weights(self, value_sequence=(-0.02, -0.01, 0.01, 0.02)):
+                pass
+
 
         @dataclass
         class MyModelConfig(ModelConfig):
@@ -244,6 +250,12 @@ def _forward(self, input_dict, **kwargs):
                     ),
                 }
 
+            def get_num_parameters(self):
+                return (0, 0)
+
+            def _set_to_dummy_weights(self, value_sequence=(-0.02, -0.01, 0.01, 0.02)):
+                pass
+
         @dataclass
         class NumpyEncoderConfig(ModelConfig):
             factor: int = None
diff --git a/rllib/utils/actor_manager.py b/rllib/utils/actor_manager.py
@@ -205,8 +205,8 @@ class FaultTolerantActorManager:
 
         @ray.remote
         class MyActor:
-            def apply(self, fn):
-                return fn(self)
+            def apply(self, func):
+                return func(self)
 
             def do_something(self):
                 return True