diff --git a/doc/source/rllib/doc_code/checkpoints.py b/doc/source/rllib/doc_code/checkpoints.py
index e8ca8ad2b320..00cb1a58b004 100644
--- a/doc/source/rllib/doc_code/checkpoints.py
+++ b/doc/source/rllib/doc_code/checkpoints.py
@@ -9,7 +9,15 @@
 
 
 # Base config used for both pickle-based checkpoint and msgpack-based one.
-config = PPOConfig().environment("CartPole-v1").env_runners(num_env_runners=0)
+config = (
+    PPOConfig()
+    .api_stack(
+        enable_rl_module_and_learner=False,
+        enable_env_runner_and_connector_v2=False,
+    )
+    .environment("CartPole-v1")
+    .env_runners(num_env_runners=0)
+)
 # Build algorithm object.
 algo1 = config.build()
 
diff --git a/doc/source/rllib/doc_code/custom_gym_env.py b/doc/source/rllib/doc_code/custom_gym_env.py
index ff659e22b1f4..925b212e4d2a 100644
--- a/doc/source/rllib/doc_code/custom_gym_env.py
+++ b/doc/source/rllib/doc_code/custom_gym_env.py
@@ -1,5 +1,6 @@
 # __rllib-custom-gym-env-begin__
 import gymnasium as gym
+import numpy as np
 
 import ray
 from ray.rllib.algorithms.ppo import PPOConfig
@@ -8,23 +9,23 @@
 class SimpleCorridor(gym.Env):
     def __init__(self, config):
         self.end_pos = config["corridor_length"]
-        self.cur_pos = 0
+        self.cur_pos = 0.0
         self.action_space = gym.spaces.Discrete(2)  # right/left
-        self.observation_space = gym.spaces.Discrete(self.end_pos)
+        self.observation_space = gym.spaces.Box(0.0, self.end_pos, shape=(1,))
 
     def reset(self, *, seed=None, options=None):
-        self.cur_pos = 0
-        return self.cur_pos, {}
+        self.cur_pos = 0.0
+        return np.array([self.cur_pos]), {}
 
     def step(self, action):
-        if action == 0 and self.cur_pos > 0:  # move right (towards goal)
-            self.cur_pos -= 1
+        if action == 0 and self.cur_pos > 0.0:  # move right (towards goal)
+            self.cur_pos -= 1.0
         elif action == 1:  # move left (towards start)
-            self.cur_pos += 1
+            self.cur_pos += 1.0
         if self.cur_pos >= self.end_pos:
-            return 0, 1.0, True, True, {}
+            return np.array([0.0]), 1.0, True, True, {}
         else:
-            return self.cur_pos, -0.1, False, False, {}
+            return np.array([self.cur_pos]), -0.1, False, False, {}
 
 
 ray.init()
diff --git a/doc/source/rllib/doc_code/rllib_in_60s.py b/doc/source/rllib/doc_code/rllib_in_60s.py
index a17de677cee0..6d214504f15d 100644
--- a/doc/source/rllib/doc_code/rllib_in_60s.py
+++ b/doc/source/rllib/doc_code/rllib_in_60s.py
@@ -2,20 +2,24 @@
 
 # __rllib-in-60s-begin__
 from ray.rllib.algorithms.ppo import PPOConfig
-from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig
+from ray.rllib.connectors.env_to_module import FlattenObservations
 
-config = (  # 1. Configure the algorithm,
+# 1. Configure the algorithm,
+config = (
     PPOConfig()
-    .environment(env="Taxi-v3")
-    .env_runners(num_env_runners=2)
-    .rl_module(model_config=DefaultModelConfig(fcnet_hiddens=[64, 64]))
+    .environment("Taxi-v3")
+    .env_runners(
+        num_env_runners=2,
+        # Observations are discrete (ints) -> We need to flatten (one-hot) them.
+        env_to_module_connector=lambda env: FlattenObservations(),
+    )
     .evaluation(evaluation_num_env_runners=1)
 )
-
-algo = config.build()  # 2. build the algorithm,
-
+# 2. build the algorithm ..
+algo = config.build()
+# 3. .. train it ..
 for _ in range(5):
-    print(algo.train())  # 3. train it,
-
-algo.evaluate()  # 4. and evaluate it.
+    print(algo.train())
+# 4. .. and evaluate it.
+algo.evaluate()
 # __rllib-in-60s-end__
diff --git a/doc/source/rllib/doc_code/rllib_on_ray_readme.py b/doc/source/rllib/doc_code/rllib_on_ray_readme.py
index 93fbac10814c..6498a1732a91 100644
--- a/doc/source/rllib/doc_code/rllib_on_ray_readme.py
+++ b/doc/source/rllib/doc_code/rllib_on_ray_readme.py
@@ -1,5 +1,8 @@
 # __quick_start_begin__
 import gymnasium as gym
+import numpy as np
+import torch
+
 from ray.rllib.algorithms.ppo import PPOConfig
 
 
@@ -19,9 +22,9 @@ class SimpleCorridor(gym.Env):
 
     def __init__(self, config):
         self.end_pos = config["corridor_length"]
-        self.cur_pos = 0
+        self.cur_pos = 0.0
         self.action_space = gym.spaces.Discrete(2)  # left and right
-        self.observation_space = gym.spaces.Box(0.0, self.end_pos, shape=(1,))
+        self.observation_space = gym.spaces.Box(0.0, self.end_pos, (1,), np.float32)
 
     def reset(self, *, seed=None, options=None):
         """Resets the episode.
@@ -29,9 +32,9 @@ def reset(self, *, seed=None, options=None):
         Returns:
            Initial observation of the new episode and an info dict.
         """
-        self.cur_pos = 0
+        self.cur_pos = 0.0
         # Return initial observation.
-        return [self.cur_pos], {}
+        return np.array([self.cur_pos], np.float32), {}
 
     def step(self, action):
         """Takes a single step in the episode given `action`.
@@ -50,23 +53,24 @@ def step(self, action):
         truncated = False
         # +1 when goal reached, otherwise -1.
         reward = 1.0 if terminated else -0.1
-        return [self.cur_pos], reward, terminated, truncated, {}
+        return np.array([self.cur_pos], np.float32), reward, terminated, truncated, {}
 
 
 # Create an RLlib Algorithm instance from a PPOConfig object.
 config = (
     PPOConfig().environment(
         # Env class to use (here: our gym.Env sub-class from above).
-        env=SimpleCorridor,
+        SimpleCorridor,
         # Config dict to be passed to our custom env's constructor.
         # Use corridor with 20 fields (including S and G).
-        env_config={"corridor_length": 28},
+        env_config={"corridor_length": 20},
     )
     # Parallelize environment rollouts.
     .env_runners(num_env_runners=3)
 )
 # Construct the actual (PPO) algorithm object from the config.
 algo = config.build()
+rl_module = algo.get_module()
 
 # Train for n iterations and report results (mean episode rewards).
 # Since we have to move at least 19 times in the env to reach the goal and
@@ -74,7 +78,7 @@ def step(self, action):
 # Expect to reach an optimal episode reward of `-0.1*18 + 1.0 = -0.8`.
 for i in range(5):
     results = algo.train()
-    print(f"Iter: {i}; avg. return={results['env_runners']['episode_return_mean']}")
+    print(f"Iter: {i}; avg. results={results['env_runners']}")
 
 # Perform inference (action computations) based on given env observations.
 # Note that we are using a slightly different env here (len 10 instead of 20),
@@ -89,7 +93,12 @@ def step(self, action):
 while not terminated and not truncated:
     # Compute a single action, given the current observation
     # from the environment.
-    action = algo.compute_single_action(obs)
+    action_logits = rl_module.forward_inference(
+        {"obs": torch.from_numpy(obs).unsqueeze(0)}
+    )["action_dist_inputs"].numpy()[
+        0
+    ]  # [0]: B=1
+    action = np.argmax(action_logits)
     # Apply the computed action in the environment.
     obs, reward, terminated, truncated, info = env.step(action)
     # Sum up rewards for reporting purposes.
diff --git a/doc/source/rllib/doc_code/rlmodule_guide.py b/doc/source/rllib/doc_code/rlmodule_guide.py
index a9f94f494125..cc7eb92c19d1 100644
--- a/doc/source/rllib/doc_code/rlmodule_guide.py
+++ b/doc/source/rllib/doc_code/rlmodule_guide.py
@@ -8,15 +8,7 @@
 
 from ray.rllib.algorithms.ppo import PPOConfig
 
-config = (
-    PPOConfig()
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    )
-    .framework("torch")
-    .environment("CartPole-v1")
-)
+config = PPOConfig().framework("torch").environment("CartPole-v1")
 
 algorithm = config.build()
 
@@ -235,21 +227,15 @@ def _forward_train(self, batch: Dict[str, Any]) -> Dict[str, Any]:
 class BCTorchRLModuleWithSharedGlobalEncoder(TorchRLModule):
     """An RLModule with a shared encoder between agents for global observation."""
 
-    def __init__(
-        self,
-        encoder: nn.Module,
-        local_dim: int,
-        hidden_dim: int,
-        action_dim: int,
-        config=None,
-    ) -> None:
-        super().__init__(config=config)
-
-        self.encoder = encoder
+    def setup(self):
+        self.encoder = self.model_config["encoder"]
         self.policy_head = nn.Sequential(
-            nn.Linear(hidden_dim + local_dim, hidden_dim),
+            nn.Linear(
+                self.model_config["hidden_dim"] + self.model_config["local_dim"],
+                self.model_config["hidden_dim"],
+            ),
             nn.ReLU(),
-            nn.Linear(hidden_dim, action_dim),
+            nn.Linear(self.model_config["hidden_dim"], self.model_config["action_dim"]),
         )
 
     def _forward_inference(self, batch: Dict[str, Any]) -> Dict[str, Any]:
@@ -288,11 +274,14 @@ def setup(self):
         rl_modules = {}
         for module_id, module_spec in module_specs.items():
             rl_modules[module_id] = BCTorchRLModuleWithSharedGlobalEncoder(
-                config=module_specs[module_id].get_rl_module_config(),
-                encoder=shared_encoder,
-                local_dim=module_spec.observation_space["local"].shape[0],
-                hidden_dim=hidden_dim,
-                action_dim=module_spec.action_space.n,
+                observation_space=module_spec.observation_space,
+                action_space=module_spec.action_space,
+                model_config={
+                    "local_dim": module_spec.observation_space["local"].shape[0],
+                    "hidden_dim": hidden_dim,
+                    "action_dim": module_spec.action_space.n,
+                    "encoder": shared_encoder,
+                },
             )
 
         self._rl_modules = rl_modules
@@ -345,14 +334,7 @@ def setup(self):
 from ray.rllib.algorithms.ppo.torch.ppo_torch_rl_module import PPOTorchRLModule
 from ray.rllib.core.rl_module.rl_module import RLModule, RLModuleSpec
 
-config = (
-    PPOConfig()
-    # Enable the new API stack (RLModule and Learner APIs).
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    ).environment("CartPole-v1")
-)
+config = PPOConfig().environment("CartPole-v1")
 env = gym.make("CartPole-v1")
 # Create an RL Module that we would like to checkpoint
 module_spec = RLModuleSpec(
diff --git a/doc/source/rllib/doc_code/saving_and_loading_algos_and_policies.py b/doc/source/rllib/doc_code/saving_and_loading_algos_and_policies.py
index 9202c904f5f9..0ba5e06b7775 100644
--- a/doc/source/rllib/doc_code/saving_and_loading_algos_and_policies.py
+++ b/doc/source/rllib/doc_code/saving_and_loading_algos_and_policies.py
@@ -4,7 +4,14 @@
 # Create a PPO algorithm object using a config object ..
 from ray.rllib.algorithms.ppo import PPOConfig
 
-my_ppo_config = PPOConfig().environment("CartPole-v1")
+my_ppo_config = (
+    PPOConfig()
+    .api_stack(
+        enable_rl_module_and_learner=False,
+        enable_env_runner_and_connector_v2=False,
+    )
+    .environment("CartPole-v1")
+)
 my_ppo = my_ppo_config.build()
 
 # .. train one iteration ..
@@ -60,21 +67,28 @@
 from ray.rllib.examples.envs.classes.multi_agent import MultiAgentCartPole
 
 # Set up a multi-agent Algorithm, training two policies independently.
-my_ma_config = PPOConfig().multi_agent(
-    # Which policies should RLlib create and train?
-    policies={"pol1", "pol2"},
-    # Let RLlib know, which agents in the environment (we'll have "agent1"
-    # and "agent2") map to which policies.
-    policy_mapping_fn=(
-        lambda agent_id, episode, worker, **kw: (
-            "pol1" if agent_id == "agent1" else "pol2"
-        )
-    ),
-    # Setting these isn't necessary. All policies will always be trained by default.
-    # However, since we do provide a list of IDs here, we need to remain in charge of
-    # changing this `policies_to_train` list, should we ever alter the Algorithm
-    # (e.g. remove one of the policies or add a new one).
-    policies_to_train=["pol1", "pol2"],  # Again, `None` would be totally fine here.
+my_ma_config = (
+    PPOConfig()
+    .api_stack(
+        enable_rl_module_and_learner=False,
+        enable_env_runner_and_connector_v2=False,
+    )
+    .multi_agent(
+        # Which policies should RLlib create and train?
+        policies={"pol1", "pol2"},
+        # Let RLlib know, which agents in the environment (we'll have "agent1"
+        # and "agent2") map to which policies.
+        policy_mapping_fn=(
+            lambda agent_id, episode, worker, **kw: (
+                "pol1" if agent_id == "agent1" else "pol2"
+            )
+        ),
+        # Setting these isn't necessary. All policies will always be trained by default.
+        # However, since we do provide a list of IDs here, we need to remain in charge of
+        # changing this `policies_to_train` list, should we ever alter the Algorithm
+        # (e.g. remove one of the policies or add a new one).
+        policies_to_train=["pol1", "pol2"],  # Again, `None` would be totally fine here.
+    )
 )
 
 # Add the MultiAgentCartPole env to our config and build our Algorithm.
@@ -168,6 +182,10 @@
 # Set up an Algorithm with 5 Policies.
 algo_w_5_policies = (
     PPOConfig()
+    .api_stack(
+        enable_rl_module_and_learner=False,
+        enable_env_runner_and_connector_v2=False,
+    )
     .environment(
         env=MultiAgentCartPole,
         env_config={
@@ -225,7 +243,13 @@ def new_policy_mapping_fn(agent_id, episode, worker, **kwargs):
 # Create a new Algorithm (which contains a Policy, which contains a NN Model).
 # Switch on for native models to be included in the Policy checkpoints.
 ppo_config = (
-    PPOConfig().environment("Pendulum-v1").checkpointing(export_native_model_files=True)
+    PPOConfig()
+    .api_stack(
+        enable_rl_module_and_learner=False,
+        enable_env_runner_and_connector_v2=False,
+    )
+    .environment("Pendulum-v1")
+    .checkpointing(export_native_model_files=True)
 )
 
 # The default framework is TensorFlow, but if you would like to do this example with
diff --git a/doc/source/rllib/key-concepts.rst b/doc/source/rllib/key-concepts.rst
index 9efd1d86a3c9..25f1e21c9642 100644
--- a/doc/source/rllib/key-concepts.rst
+++ b/doc/source/rllib/key-concepts.rst
@@ -73,7 +73,15 @@ which implements the proximal policy optimization algorithm in RLlib.
 
             # Configure.
             from ray.rllib.algorithms.ppo import PPOConfig
-            config = PPOConfig().environment(env="CartPole-v1").training(train_batch_size=4000)
+            config = (
+                PPOConfig()
+                .api_stack(
+                    enable_rl_module_and_learner=True,
+                    enable_env_runner_and_connector_v2=True,
+                )
+                .environment("CartPole-v1")
+                .training(train_batch_size_per_learner=4000)
+            )
 
             # Build.
             algo = config.build()
@@ -91,7 +99,15 @@ which implements the proximal policy optimization algorithm in RLlib.
 
             # Configure.
             from ray.rllib.algorithms.ppo import PPOConfig
-            config = PPOConfig().environment(env="CartPole-v1").training(train_batch_size=4000)
+            config = (
+                PPOConfig()
+                .api_stack(
+                    enable_rl_module_and_learner=True,
+                    enable_env_runner_and_connector_v2=True,
+                )
+                .environment("CartPole-v1")
+                .training(train_batch_size_per_learner=4000)
+            )
 
             # Train via Ray Tune.
             tune.run("PPO", config=config)
diff --git a/doc/source/rllib/rllib-learner.rst b/doc/source/rllib/rllib-learner.rst
index 712a24146054..ff75cfe45859 100644
--- a/doc/source/rllib/rllib-learner.rst
+++ b/doc/source/rllib/rllib-learner.rst
@@ -57,10 +57,6 @@ arguments in the :py:class:`~ray.rllib.algorithms.algorithm_config.AlgorithmConf
 
     config = (
         PPOConfig()
-        .api_stack(
-            enable_rl_module_and_learner=True,
-            enable_env_runner_and_connector_v2=True,
-        )
         .learners(
             num_learners=0,  # Set this to greater than 1 to allow for DDP style updates.
             num_gpus_per_learner=0,  # Set this to 1 to enable GPU training.
@@ -177,6 +173,9 @@ and :py:class:`~ray.rllib.core.learner.learner.Learner` APIs via the :py:class:`
             # Construct a new Learner using our config object.
             learner = config.build_learner(env=env)
 
+            # Needs to be called on the learner before calling any functions.
+            learner.build()
+
 
 Updates
 -------
@@ -217,8 +216,8 @@ Updates
     }
     default_batch = SampleBatch(DUMMY_BATCH)
     DUMMY_BATCH = default_batch.as_multi_agent()
-
-    learner.build() # needs to be called on the learner before calling any functions
+    # Make sure, we convert the batch to the correct framework (here: torch).
+    DUMMY_BATCH = learner._convert_batch_type(DUMMY_BATCH)
 
 
 .. tab-set::
diff --git a/rllib/BUILD b/rllib/BUILD
index f040dbab4e73..080d42f883df 100644
--- a/rllib/BUILD
+++ b/rllib/BUILD
@@ -772,37 +772,6 @@ py_test(
     args = ["--as-test", "--enable-new-api-stack", "--num-agents=2", "--num-gpus=2"]
 )
 
-#@OldAPIStack
-py_test(
-    name = "learning_tests_pendulum_ppo_old_api_stack",
-    main = "tests/run_regression_tests.py",
-    tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_continuous", "no_tf_static_graph"],
-    size = "large", # bazel may complain about it being too long sometimes - large is on purpose as some frameworks take longer
-    srcs = ["tests/run_regression_tests.py"],
-    data = ["tuned_examples/ppo/pendulum-ppo.yaml"],
-    args = ["--dir=tuned_examples/ppo"]
-)
-#@OldAPIStack
-py_test(
-    name = "learning_tests_transformed_actions_pendulum_ppo_old_api_stack",
-    main = "tests/run_regression_tests.py",
-    tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_continuous", "no_tf_static_graph"],
-    size = "large", # bazel may complain about it being too long sometimes - large is on purpose as some frameworks take longer
-    srcs = ["tests/run_regression_tests.py"],
-    data = ["tuned_examples/ppo/pendulum-transformed-actions-ppo.yaml"],
-    args = ["--dir=tuned_examples/ppo"]
-)
-#@OldAPIStack
-py_test(
-    name = "learning_tests_repeat_after_me_ppo_old_api_stack",
-    main = "tests/run_regression_tests.py",
-    tags = ["team:rllib", "exclusive", "learning_tests", "learning_tests_discrete"],
-    size = "medium",
-    srcs = ["tests/run_regression_tests.py"],
-    data = ["tuned_examples/ppo/repeatafterme-ppo-lstm.yaml"],
-    args = ["--dir=tuned_examples/ppo"]
-)
-
 # SAC
 # Pendulum
 py_test(
@@ -2078,13 +2047,6 @@ py_test(
     srcs = ["tests/test_placement_groups.py"]
 )
 
-py_test(
-    name = "tests/test_ray_client",
-    tags = ["team:rllib", "tests_dir"],
-    size = "large",
-    srcs = ["tests/test_ray_client.py"]
-)
-
 py_test(
     name = "tests/test_reproducibility",
     tags = ["team:rllib", "tests_dir"],
@@ -3173,42 +3135,6 @@ py_test(
     args = ["--as-test", "--framework=torch", "--stop-reward=7.2"]
 )
 
-py_test(
-    name = "examples/custom_recurrent_rnn_tokenizer_repeat_after_me_tf2",
-    main = "examples/custom_recurrent_rnn_tokenizer.py",
-    tags = ["team:rllib", "exclusive", "examples"],
-    size = "medium",
-    srcs = ["examples/custom_recurrent_rnn_tokenizer.py"],
-    args = ["--as-test", "--framework=tf2", "--stop-reward=40", "--env=RepeatAfterMeEnv", "--num-cpus=4"]
-)
-
-py_test(
-    name = "examples/custom_recurrent_rnn_tokenizer_repeat_initial_obs_env_tf2",
-    main = "examples/custom_recurrent_rnn_tokenizer.py",
-    tags = ["team:rllib", "examples"],
-    size = "medium",
-    srcs = ["examples/custom_recurrent_rnn_tokenizer.py"],
-    args = ["--as-test", "--framework=tf2", "--stop-reward=10", "--stop-timesteps=300000", "--env=RepeatInitialObsEnv", "--num-cpus=4"]
-)
-
-py_test(
-    name = "examples/custom_recurrent_rnn_tokenizer_repeat_after_me_torch",
-    main = "examples/custom_recurrent_rnn_tokenizer.py",
-    tags = ["team:rllib", "exclusive", "examples"],
-    size = "medium",
-    srcs = ["examples/custom_recurrent_rnn_tokenizer.py"],
-    args = ["--as-test", "--framework=torch", "--stop-reward=40", "--env=RepeatAfterMeEnv", "--num-cpus=4"]
-)
-
-py_test(
-    name = "examples/custom_recurrent_rnn_tokenizer_repeat_initial_obs_env_torch",
-    main = "examples/custom_recurrent_rnn_tokenizer.py",
-    tags = ["team:rllib", "exclusive", "examples"],
-    size = "medium",
-    srcs = ["examples/custom_recurrent_rnn_tokenizer.py"],
-    args = ["--as-test", "--framework=torch", "--stop-reward=10", "--stop-timesteps=300000", "--env=RepeatInitialObsEnv", "--num-cpus=4"]
-)
-
 py_test(
     name = "examples/replay_buffer_api",
     tags = ["team:rllib", "examples"],
diff --git a/rllib/algorithms/algorithm.py b/rllib/algorithms/algorithm.py
index 863e06eec904..15cd2d81d9d7 100644
--- a/rllib/algorithms/algorithm.py
+++ b/rllib/algorithms/algorithm.py
@@ -2544,15 +2544,6 @@ def export_policy_model(
             onnx: If given, will export model in ONNX format. The
                 value of this parameter set the ONNX OpSet version to use.
                 If None, the output format will be DL framework specific.
-
-        .. testcode::
-
-            from ray.rllib.algorithms.ppo import PPO, PPOConfig
-            config = PPOConfig().environment("CartPole-v1")
-            algo = PPO(config=config)
-            algo.train()
-            algo.export_policy_checkpoint("/tmp/export_dir")
-            algo.export_policy_model("/tmp/dir")
         """
         self.get_policy(policy_id).export_model(export_dir, onnx)
 
@@ -2573,14 +2564,6 @@ def export_policy_checkpoint(
 
         Raises:
             KeyError: if `policy_id` cannot be found in this Algorithm.
-
-        .. testcode::
-
-            from ray.rllib.algorithms.ppo import PPO, PPOConfig
-            config = PPOConfig().environment("CartPole-v1")
-            algo = PPO(config=config)
-            algo.train()
-            algo.export_policy_checkpoint("/tmp/export_dir")
         """
         policy = self.get_policy(policy_id)
         if policy is None:
diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py
index d444c4347683..5bc6daca9db7 100644
--- a/rllib/algorithms/algorithm_config.py
+++ b/rllib/algorithms/algorithm_config.py
@@ -19,6 +19,7 @@
 )
 
 import gymnasium as gym
+import tree
 from packaging import version
 
 import ray
@@ -58,6 +59,7 @@
     deserialize_type,
     serialize_type,
 )
+from ray.rllib.utils.test_utils import check
 from ray.rllib.utils.torch_utils import TORCH_COMPILE_REQUIRED_VERSION
 from ray.rllib.utils.typing import (
     AgentID,
@@ -701,12 +703,18 @@ def update_from_dict(
         # Namely, we want to re-instantiate the exploration config this config had
         # inside `self.experimental()` before potentially overwriting it in the
         # following.
-        enable_rl_module_and_learner = config_dict.get(
+        enable_new_api_stack = config_dict.get(
             "_enable_new_api_stack",
-            config_dict.get("enable_rl_module_and_learner"),
+            config_dict.get(
+                "enable_rl_module_and_learner",
+                config_dict.get("enable_env_runner_and_connector_v2"),
+            ),
         )
-        if enable_rl_module_and_learner:
-            self.api_stack(enable_rl_module_and_learner=enable_rl_module_and_learner)
+        if enable_new_api_stack is not None:
+            self.api_stack(
+                enable_rl_module_and_learner=enable_new_api_stack,
+                enable_env_runner_and_connector_v2=enable_new_api_stack,
+            )
 
         # Modify our properties one by one.
         for key, value in config_dict.items():
@@ -750,7 +758,7 @@ def update_from_dict(
             elif key.startswith("evaluation_"):
                 eval_call[key] = value
             elif key == "exploration_config":
-                if enable_rl_module_and_learner:
+                if enable_new_api_stack:
                     self.exploration_config = value
                     continue
                 if isinstance(value, dict) and "type" in value:
@@ -4420,6 +4428,7 @@ def _validate_input_settings(self):
     def _validate_new_api_stack_settings(self):
         """Checks, whether settings related to the new API stack make sense."""
 
+        # Old API stack checks.
         if not self.enable_rl_module_and_learner:
             # Throw a warning if the user has used `self.rl_module(rl_module_spec=...)`
             # but has not enabled the new API stack at the same time.
@@ -4462,6 +4471,26 @@ def _validate_new_api_stack_settings(self):
                 "to False (old API stack), instead."
             )
 
+        # For those users that accidentally use the new API stack (because it's the
+        # default now for many algos), we need to make sure they are warned.
+        try:
+            tree.assert_same_structure(self.model, MODEL_DEFAULTS)
+            # Create copies excluding the specified key
+            check(
+                {k: v for k, v in self.model.items() if k != "vf_share_layers"},
+                {k: v for k, v in MODEL_DEFAULTS.items() if k != "vf_share_layers"},
+            )
+        except Exception:
+            logger.warning(
+                "You configured a custom `model` config (probably through calling "
+                "config.training(model=..), whereas your config uses the new API "
+                "stack! In order to switch off the new API stack, set in your config: "
+                "`config.api_stack(enable_rl_module_and_learner=False, "
+                "enable_env_runner_and_connector_v2=False)`. If you DO want to use "
+                "the new API stack, configure your model, instead, through: "
+                "`config.rl_module(model_config={..})`."
+            )
+
         # LR-schedule checking.
         Scheduler.validate(
             fixed_value_or_schedule=self.lr,
diff --git a/rllib/algorithms/bc/torch/bc_torch_rl_module.py b/rllib/algorithms/bc/torch/bc_torch_rl_module.py
index d06c323b124e..bcdd3660e194 100644
--- a/rllib/algorithms/bc/torch/bc_torch_rl_module.py
+++ b/rllib/algorithms/bc/torch/bc_torch_rl_module.py
@@ -10,6 +10,8 @@
 class BCTorchRLModule(TorchRLModule):
     @override(RLModule)
     def setup(self):
+        if self.catalog is None and hasattr(self, "_catalog_ctor_error"):
+            raise self._catalog_ctor_error
         # __sphinx_doc_begin__
         # Build models from catalog.
         self.encoder = self.catalog.build_encoder(framework=self.framework)
diff --git a/rllib/algorithms/dqn/dqn.py b/rllib/algorithms/dqn/dqn.py
index 91ca34450f6f..622718055e37 100644
--- a/rllib/algorithms/dqn/dqn.py
+++ b/rllib/algorithms/dqn/dqn.py
@@ -427,11 +427,12 @@ def validate(self) -> None:
         # Warn about new API stack on by default.
         if self.enable_rl_module_and_learner:
             logger.warning(
-                "You are running DQN on the new API stack! This is the new default "
-                "behavior for this algorithm. If you don't want to use the new API "
-                "stack, set `config.api_stack(enable_rl_module_and_learner=False, "
-                "enable_env_runner_and_connector_v2=False)`. For a detailed "
-                "migration guide, see here: https://docs.ray.io/en/master/rllib/new-api-stack-migration-guide.html"  # noqa
+                f"You are running {self.algo_class.__name__} on the new API stack! "
+                "This is the new default behavior for this algorithm. If you don't "
+                "want to use the new API stack, set `config.api_stack("
+                "enable_rl_module_and_learner=False,"
+                "enable_env_runner_and_connector_v2=False)`. For a detailed migration "
+                "guide, see here: https://docs.ray.io/en/master/rllib/new-api-stack-migration-guide.html"  # noqa
             )
 
         if (
diff --git a/rllib/algorithms/dqn/dqn_rainbow_rl_module.py b/rllib/algorithms/dqn/dqn_rainbow_rl_module.py
index c6dbafead5ae..2d7c1f97c0a8 100644
--- a/rllib/algorithms/dqn/dqn_rainbow_rl_module.py
+++ b/rllib/algorithms/dqn/dqn_rainbow_rl_module.py
@@ -29,6 +29,9 @@
 class DQNRainbowRLModule(RLModule, InferenceOnlyAPI, TargetNetworkAPI):
     @override(RLModule)
     def setup(self):
+        if self.catalog is None and hasattr(self, "_catalog_ctor_error"):
+            raise self._catalog_ctor_error
+
         # If a dueling architecture is used.
         self.uses_dueling: bool = self.model_config.get("dueling")
         # If double Q learning is used.
diff --git a/rllib/algorithms/ppo/ppo.py b/rllib/algorithms/ppo/ppo.py
index 6077bd35ae91..172411d9276f 100644
--- a/rllib/algorithms/ppo/ppo.py
+++ b/rllib/algorithms/ppo/ppo.py
@@ -70,11 +70,6 @@ class PPOConfig(AlgorithmConfig):
         from ray.rllib.algorithms.ppo import PPOConfig
 
         config = PPOConfig()
-        # Activate new API stack.
-        config.api_stack(
-            enable_rl_module_and_learner=True,
-            enable_env_runner_and_connector_v2=True,
-        )
         config.environment("CartPole-v1")
         config.env_runners(num_env_runners=1)
         config.training(
@@ -93,11 +88,6 @@ class PPOConfig(AlgorithmConfig):
 
         config = (
             PPOConfig()
-            # Activate new API stack.
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             # Set the config object's env.
             .environment(env="CartPole-v1")
             # Update the config object's training parameters.
@@ -122,6 +112,16 @@ def __init__(self, algo_class=None):
         """Initializes a PPOConfig instance."""
         super().__init__(algo_class=algo_class or PPO)
 
+        self.exploration_config = {
+            # The Exploration class to use. In the simplest case, this is the name
+            # (str) of any class present in the `rllib.utils.exploration` package.
+            # You can also provide the python class directly or the full location
+            # of your class (e.g. "ray.rllib.utils.exploration.epsilon_greedy.
+            # EpsilonGreedy").
+            "type": "StochasticSampling",
+            # Add constructor kwargs here (if any).
+        }
+
         # fmt: off
         # __sphinx_doc_begin__
         self.lr_schedule = None
@@ -149,6 +149,12 @@ def __init__(self, algo_class=None):
         # Override some of AlgorithmConfig's default values with PPO-specific values.
         self.num_env_runners = 2
         self.model["vf_share_layers"] = False
+
+        # `.api_stack()`
+        self.api_stack(
+            enable_rl_module_and_learner=True,
+            enable_env_runner_and_connector_v2=True,
+        )
         # __sphinx_doc_end__
         # fmt: on
 
@@ -156,16 +162,6 @@ def __init__(self, algo_class=None):
         self.sgd_minibatch_size = DEPRECATED_VALUE
         self.vf_share_layers = DEPRECATED_VALUE
 
-        self.exploration_config = {
-            # The Exploration class to use. In the simplest case, this is the name
-            # (str) of any class present in the `rllib.utils.exploration` package.
-            # You can also provide the python class directly or the full location
-            # of your class (e.g. "ray.rllib.utils.exploration.epsilon_greedy.
-            # EpsilonGreedy").
-            "type": "StochasticSampling",
-            # Add constructor kwargs here (if any).
-        }
-
     @override(AlgorithmConfig)
     def get_default_rl_module_spec(self) -> RLModuleSpec:
         from ray.rllib.algorithms.ppo.ppo_catalog import PPOCatalog
@@ -304,6 +300,17 @@ def validate(self) -> None:
         # Call super's validation method.
         super().validate()
 
+        # Warn about new API stack on by default.
+        if self.enable_rl_module_and_learner:
+            logger.warning(
+                f"You are running {self.algo_class.__name__} on the new API stack! "
+                "This is the new default behavior for this algorithm. If you don't "
+                "want to use the new API stack, set `config.api_stack("
+                "enable_rl_module_and_learner=False,"
+                "enable_env_runner_and_connector_v2=False)`. For a detailed migration "
+                "guide, see here: https://docs.ray.io/en/master/rllib/new-api-stack-migration-guide.html"  # noqa
+            )
+
         # Synchronous sampling, on-policy/PPO algos -> Check mismatches between
         # `rollout_fragment_length` and `train_batch_size_per_learner` to avoid user
         # confusion.
diff --git a/rllib/algorithms/ppo/ppo_rl_module.py b/rllib/algorithms/ppo/ppo_rl_module.py
index 30ca5d843df1..833e8d9d4227 100644
--- a/rllib/algorithms/ppo/ppo_rl_module.py
+++ b/rllib/algorithms/ppo/ppo_rl_module.py
@@ -19,6 +19,9 @@
 class PPORLModule(RLModule, InferenceOnlyAPI, ValueFunctionAPI, abc.ABC):
     @override(RLModule)
     def setup(self):
+        if self.catalog is None and hasattr(self, "_catalog_ctor_error"):
+            raise self._catalog_ctor_error
+
         # __sphinx_doc_begin__
         # If we have a stateful model, states for the critic need to be collected
         # during sampling and `inference-only` needs to be `False`. Note, at this
diff --git a/rllib/algorithms/ppo/tests/test_ppo.py b/rllib/algorithms/ppo/tests/test_ppo.py
index 3febf97fb2ca..575bcece9897 100644
--- a/rllib/algorithms/ppo/tests/test_ppo.py
+++ b/rllib/algorithms/ppo/tests/test_ppo.py
@@ -66,11 +66,6 @@ def test_ppo_compilation_and_schedule_mixins(self):
         # Build a PPOConfig object with the `SingleAgentEnvRunner` class.
         config = (
             ppo.PPOConfig()
-            # Enable new API stack and use EnvRunner.
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .env_runners(num_env_runners=0)
             .training(
                 num_epochs=2,
@@ -93,12 +88,10 @@ def test_ppo_compilation_and_schedule_mixins(self):
 
         num_iterations = 2
 
-        # TODO (sven) Bring back "FrozenLake-v1"
         for env in [
-            # "CliffWalking-v0",
             "CartPole-v1",
             "Pendulum-v1",
-        ]:  # "ale_py:ALE/Breakout-v5"]:
+        ]:
             print("Env={}".format(env))
             for lstm in [False]:
                 print("LSTM={}".format(lstm))
@@ -132,10 +125,6 @@ def test_ppo_free_log_std(self):
         """Tests the free log std option works."""
         config = (
             ppo.PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .environment("Pendulum-v1")
             .env_runners(
                 num_env_runners=1,
diff --git a/rllib/algorithms/ppo/tests/test_ppo_learner.py b/rllib/algorithms/ppo/tests/test_ppo_learner.py
index 69ceab171497..1d5f83639bb9 100644
--- a/rllib/algorithms/ppo/tests/test_ppo_learner.py
+++ b/rllib/algorithms/ppo/tests/test_ppo_learner.py
@@ -52,10 +52,6 @@ def test_save_to_path_and_restore_from_path(self):
         """Tests saving and loading the state of the PPO Learner Group."""
         config = (
             ppo.PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .environment("CartPole-v1")
             .env_runners(
                 num_env_runners=0,
@@ -92,10 +88,6 @@ def test_kl_coeff_changes(self):
         initial_kl_coeff = 0.01
         config = (
             ppo.PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .environment("CartPole-v1")
             .env_runners(
                 num_env_runners=0,
diff --git a/rllib/algorithms/ppo/tests/test_ppo_old_api_stack.py b/rllib/algorithms/ppo/tests/test_ppo_old_api_stack.py
index edb2b3b3122e..c55bf2445b92 100644
--- a/rllib/algorithms/ppo/tests/test_ppo_old_api_stack.py
+++ b/rllib/algorithms/ppo/tests/test_ppo_old_api_stack.py
@@ -125,6 +125,10 @@ def test_ppo_compilation_w_connectors(self):
         # Build a PPOConfig object.
         config = (
             ppo.PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .training(
                 num_epochs=2,
                 # Setup lr schedule for testing.
@@ -190,6 +194,10 @@ def test_ppo_compilation_and_schedule_mixins(self):
         # Build a PPOConfig object.
         config = (
             ppo.PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .training(
                 # Setup lr schedule for testing.
                 lr_schedule=[[0, 5e-5], [256, 0.0]],
@@ -255,6 +263,10 @@ def test_ppo_exploration_setup(self):
         """Tests, whether PPO runs with different exploration setups."""
         config = (
             ppo.PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .environment(
                 "FrozenLake-v1",
                 env_config={"is_slippery": False, "map_name": "4x4"},
@@ -303,6 +315,10 @@ def test_ppo_free_log_std(self):
 
         config = (
             ppo.PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .environment("CartPole-v1")
             .env_runners(
                 num_env_runners=0,
@@ -353,6 +369,10 @@ def test_ppo_loss_function(self):
         """
         config = (
             ppo.PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .environment("CartPole-v1")
             .env_runners(
                 num_env_runners=0,
diff --git a/rllib/algorithms/sac/sac_rl_module.py b/rllib/algorithms/sac/sac_rl_module.py
index 832df79e9ede..d6d1b783d326 100644
--- a/rllib/algorithms/sac/sac_rl_module.py
+++ b/rllib/algorithms/sac/sac_rl_module.py
@@ -54,6 +54,9 @@ class SACRLModule(RLModule, InferenceOnlyAPI, TargetNetworkAPI):
 
     @override(RLModule)
     def setup(self):
+        if self.catalog is None and hasattr(self, "_catalog_ctor_error"):
+            raise self._catalog_ctor_error
+
         # If a twin Q architecture should be used.
         self.twin_q = self.model_config["twin_q"]
 
diff --git a/rllib/algorithms/sac/tests/test_sac.py b/rllib/algorithms/sac/tests/test_sac.py
index b9f0eba34ec8..53c5749f7966 100644
--- a/rllib/algorithms/sac/tests/test_sac.py
+++ b/rllib/algorithms/sac/tests/test_sac.py
@@ -7,11 +7,6 @@
 from ray.rllib.algorithms import sac
 from ray.rllib.connectors.env_to_module.flatten_observations import FlattenObservations
 from ray.rllib.examples.envs.classes.random_env import RandomEnv
-from ray.rllib.examples._old_api_stack.models.batch_norm_model import (
-    KerasBatchNormModel,
-    TorchBatchNormModel,
-)
-from ray.rllib.models.catalog import ModelCatalog
 from ray.rllib.utils.framework import try_import_tf, try_import_torch
 from ray.rllib.utils.spaces.simplex import Simplex
 from ray.rllib.utils.test_utils import check_train_results_new_api_stack
@@ -80,9 +75,6 @@ def test_sac_compilation(self):
         )
         num_iterations = 1
 
-        ModelCatalog.register_custom_model("batch_norm", KerasBatchNormModel)
-        ModelCatalog.register_custom_model("batch_norm_torch", TorchBatchNormModel)
-
         image_space = Box(-1.0, 1.0, shape=(84, 84, 3))
         simple_space = Box(-1.0, 1.0, shape=(3,))
 
diff --git a/rllib/algorithms/tests/test_algorithm.py b/rllib/algorithms/tests/test_algorithm.py
index aba304cc3e16..2175eb62091f 100644
--- a/rllib/algorithms/tests/test_algorithm.py
+++ b/rllib/algorithms/tests/test_algorithm.py
@@ -3,7 +3,6 @@
 import os
 from pathlib import Path
 from random import choice
-import time
 import unittest
 
 import ray
@@ -27,7 +26,6 @@
     LEARNER_RESULTS,
 )
 from ray.rllib.utils.metrics.learner_info import LEARNER_INFO
-from ray.rllib.utils.test_utils import check
 from ray.tune import register_env
 
 
@@ -44,10 +42,6 @@ def tearDownClass(cls):
     def test_add_module_and_remove_module(self):
         config = (
             ppo.PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .environment(
                 env="multi_cart",
                 env_config={"num_agents": 4},
@@ -213,6 +207,10 @@ def new_mapping_fn(agent_id, episode, **kwargs):
     def test_add_policy_and_remove_policy(self):
         config = (
             ppo.PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .environment(
                 env=MultiAgentCartPole,
                 env_config={
@@ -485,6 +483,10 @@ def test_evaluation_wo_evaluation_env_runner_group(self):
         # configured exact number of episodes per evaluation.
         config = (
             ppo.PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .environment(env="CartPole-v1")
             .callbacks(callbacks_class=AssertEvalCallback)
         )
@@ -513,29 +515,6 @@ def test_evaluation_wo_evaluation_env_runner_group(self):
         algo_w_env_on_local_worker.stop()
         config.create_env_on_local_worker = False
 
-    def test_worker_validation_time(self):
-        """Tests the time taken by `validate_env_runners_after_construction=True`."""
-        config = ppo.PPOConfig().environment(env="CartPole-v1")
-        config.validate_env_runners_after_construction = True
-
-        # Test, whether validating one worker takes just as long as validating
-        # >> 1 workers.
-        config.num_env_runners = 1
-        t0 = time.time()
-        algo = config.build()
-        total_time_1 = time.time() - t0
-        print(f"Validating w/ 1 worker: {total_time_1}sec")
-        algo.stop()
-
-        config.num_env_runners = 5
-        t0 = time.time()
-        algo = config.build()
-        total_time_5 = time.time() - t0
-        print(f"Validating w/ 5 workers: {total_time_5}sec")
-        algo.stop()
-
-        check(total_time_5 / total_time_1, 1.0, atol=1.0)
-
     def test_no_env_but_eval_workers_do_have_env(self):
         """Tests whether no env on workers, but env on eval workers works ok."""
         script_path = Path(__file__)
@@ -570,7 +549,14 @@ def test_no_env_but_eval_workers_do_have_env(self):
     def test_counters_after_checkpoint(self):
         # We expect algorithm to no start counters from zero after loading a
         # checkpoint on a fresh Algorithm instance
-        config = ppo.PPOConfig().environment(env="CartPole-v1")
+        config = (
+            ppo.PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
+            .environment(env="CartPole-v1")
+        )
         algo = config.build()
 
         self.assertTrue(all(c == 0 for c in algo._counters.values()))
diff --git a/rllib/algorithms/tests/test_algorithm_config.py b/rllib/algorithms/tests/test_algorithm_config.py
index b88f16636698..36da463d43a9 100644
--- a/rllib/algorithms/tests/test_algorithm_config.py
+++ b/rllib/algorithms/tests/test_algorithm_config.py
@@ -169,15 +169,7 @@ def test_detect_atari_env(self):
         self.assertFalse(config.is_atari)
 
     def test_rl_module_api(self):
-        config = (
-            PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
-            .environment("CartPole-v1")
-            .framework("torch")
-        )
+        config = PPOConfig().environment("CartPole-v1").framework("torch")
 
         self.assertEqual(config.rl_module_spec.module_class, PPOTorchRLModule)
 
@@ -231,14 +223,7 @@ def test_config_per_module(self):
         self.assertTrue(config_3 is config)
 
     def test_learner_api(self):
-        config = (
-            PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
-            .environment("CartPole-v1")
-        )
+        config = PPOConfig().environment("CartPole-v1")
 
         self.assertEqual(config.learner_class, PPOTorchLearner)
 
diff --git a/rllib/algorithms/tests/test_algorithm_rl_module_restore.py b/rllib/algorithms/tests/test_algorithm_rl_module_restore.py
index b9979da368d3..a4b0f7720937 100644
--- a/rllib/algorithms/tests/test_algorithm_rl_module_restore.py
+++ b/rllib/algorithms/tests/test_algorithm_rl_module_restore.py
@@ -49,10 +49,6 @@ def policy_mapping_fn(agent_id, episode, **kwargs):
 
         config = (
             PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .env_runners(rollout_fragment_length=4)
             .learners(**scaling_config)
             .environment(MultiAgentCartPole, env_config={"num_agents": num_agents})
@@ -184,10 +180,6 @@ def test_e2e_load_rl_module(self):
 
         config = (
             PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .env_runners(rollout_fragment_length=4)
             .learners(**scaling_config)
             .environment("CartPole-v1")
diff --git a/rllib/algorithms/tests/test_callbacks_old_api_stack.py b/rllib/algorithms/tests/test_callbacks_old_api_stack.py
index 0d72cd7abceb..c96bde5a7c51 100644
--- a/rllib/algorithms/tests/test_callbacks_old_api_stack.py
+++ b/rllib/algorithms/tests/test_callbacks_old_api_stack.py
@@ -70,6 +70,10 @@ def tearDownClass(cls):
     def test_episode_and_sample_callbacks(self):
         config = (
             PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .environment("CartPole-v1")
             .env_runners(num_env_runners=0)
             .callbacks(EpisodeAndSampleCallbacks)
@@ -88,7 +92,12 @@ def test_episode_and_sample_callbacks(self):
     def test_on_sub_environment_created(self):
 
         config = (
-            PPOConfig().environment("CartPole-v1")
+            PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
+            .environment("CartPole-v1")
             # Create 4 sub-environments per remote worker.
             # Create 2 remote workers.
             .env_runners(num_envs_per_env_runner=4, num_env_runners=2)
@@ -121,6 +130,10 @@ def test_on_sub_environment_created(self):
     def test_on_sub_environment_created_with_remote_envs(self):
         config = (
             PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .environment("CartPole-v1")
             .env_runners(
                 # Make each sub-environment a ray actor.
@@ -162,6 +175,10 @@ def test_on_episode_created(self):
         # starts.
         config = (
             PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .environment(
                 RandomEnv,
                 env_config={
diff --git a/rllib/algorithms/tests/test_callbacks_on_env_runner.py b/rllib/algorithms/tests/test_callbacks_on_env_runner.py
index ae8443b5b811..b977022cec9f 100644
--- a/rllib/algorithms/tests/test_callbacks_on_env_runner.py
+++ b/rllib/algorithms/tests/test_callbacks_on_env_runner.py
@@ -94,10 +94,6 @@ def tearDownClass(cls):
     def test_episode_and_sample_callbacks_batch_mode_truncate_episodes(self):
         config = (
             PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .environment("CartPole-v1")
             .env_runners(
                 num_env_runners=0,
@@ -146,10 +142,6 @@ def test_episode_and_sample_callbacks_batch_mode_truncate_episodes(self):
     def test_episode_and_sample_callbacks_batch_mode_complete_episodes(self):
         config = (
             PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .environment("CartPole-v1")
             .env_runners(
                 batch_mode="complete_episodes",
@@ -195,23 +187,12 @@ def test_episode_and_sample_callbacks_batch_mode_complete_episodes(self):
 
     def test_overriding_on_episode_created_throws_error_on_new_api_stack(self):
         """Tests whether overriding `on_episode_created` raises error w/ SAEnvRunner."""
-        config = (
-            PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
-            .callbacks(OnEpisodeCreatedCallback)
-        )
+        config = PPOConfig().callbacks(OnEpisodeCreatedCallback)
         self.assertRaises(ValueError, lambda: config.validate())
 
     def test_tune_trial_id_visible_in_callbacks(self):
         config = (
             PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .environment("multi_cart", env_config={"num_agents": 2})
             .callbacks(OnEnvironmentCreatedCallback)
             .multi_agent(
diff --git a/rllib/algorithms/tests/test_env_runner_failures.py b/rllib/algorithms/tests/test_env_runner_failures.py
index 45308d1efaca..5fedec14eb0c 100644
--- a/rllib/algorithms/tests/test_env_runner_failures.py
+++ b/rllib/algorithms/tests/test_env_runner_failures.py
@@ -392,12 +392,7 @@ def _do_test_failing_recover(self, config, multi_agent=False):
     def test_fatal_single_agent(self):
         # Test the case where all workers fail (w/o recovery).
         self._do_test_failing_fatal(
-            PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
-            .env_runners(
+            PPOConfig().env_runners(
                 env_to_module_connector=lambda env: FlattenObservations(),
             )
         )
@@ -405,12 +400,9 @@ def test_fatal_single_agent(self):
     def test_fatal_multi_agent(self):
         # Test the case where all workers fail (w/o recovery).
         self._do_test_failing_fatal(
-            PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
-            .multi_agent(policies={"p0"}, policy_mapping_fn=lambda *a, **k: "p0"),
+            PPOConfig().multi_agent(
+                policies={"p0"}, policy_mapping_fn=lambda *a, **k: "p0"
+            ),
         )
 
     def test_async_samples(self):
@@ -436,10 +428,6 @@ def test_sync_replay(self):
     def test_multi_gpu(self):
         self._do_test_failing_ignore(
             PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .env_runners(env_runner_cls=ForwardHealthCheckToEnvWorker)
             .training(
                 train_batch_size=10,
@@ -451,10 +439,6 @@ def test_multi_gpu(self):
     def test_sync_samples(self):
         self._do_test_failing_ignore(
             PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .env_runners(env_runner_cls=ForwardHealthCheckToEnvWorker)
             .training(optimizer={})
         )
@@ -471,10 +455,6 @@ def test_env_crash_during_sampling_but_restart_crashed_sub_envs(self):
 
         config = (
             PPOConfig()
-            .api_stack(
-                enable_env_runner_and_connector_v2=True,
-                enable_rl_module_and_learner=True,
-            )
             .env_runners(num_env_runners=4)
             .fault_tolerance(
                 # Re-start failed individual sub-envs (then continue).
@@ -520,10 +500,6 @@ def test_eval_workers_failing_ignore(self):
         # Test the case where one eval worker fails, but we chose to ignore.
         self._do_test_failing_ignore(
             PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .env_runners(env_runner_cls=ForwardHealthCheckToEnvWorker)
             .training(model={"fcnet_hiddens": [4]}),
             fail_eval=True,
@@ -533,10 +509,6 @@ def test_eval_workers_parallel_to_training_failing_recover(self):
         # Test the case where all eval workers fail, but we chose to recover.
         config = (
             PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .env_runners(env_runner_cls=ForwardHealthCheckToEnvWorker)
             .evaluation(
                 evaluation_num_env_runners=1,
@@ -556,10 +528,6 @@ def test_eval_workers_parallel_to_training_multi_agent_failing_recover(
         # to recover.
         config = (
             PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .env_runners(env_runner_cls=ForwardHealthCheckToEnvWorkerMultiAgent)
             .multi_agent(
                 policies={"main", "p0", "p1"},
@@ -595,10 +563,6 @@ def test_workers_failing_recover(self):
 
         config = (
             PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .env_runners(
                 env_runner_cls=ForwardHealthCheckToEnvWorker,
                 num_env_runners=2,
@@ -654,10 +618,6 @@ def test_modules_are_restored_on_recovered_worker(self):
 
         config = (
             PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .env_runners(
                 env_runner_cls=ForwardHealthCheckToEnvWorkerMultiAgent,
                 num_env_runners=2,
@@ -763,10 +723,6 @@ def test_eval_workers_failing_recover(self):
 
         config = (
             PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .env_runners(
                 env_runner_cls=ForwardHealthCheckToEnvWorker,
                 num_env_runners=2,
@@ -904,7 +860,11 @@ def test_eval_workers_on_infinite_episodes(self):
         # horizon -> Expect warning and no proper evaluation results.
         config = (
             PPOConfig()
-            .environment(env=RandomEnv, env_config={"p_terminated": 0.0})
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
+            .environment(RandomEnv, env_config={"p_terminated": 0.0})
             .training(train_batch_size_per_learner=200)
             .evaluation(
                 evaluation_num_env_runners=1,
diff --git a/rllib/algorithms/tests/test_node_failures.py b/rllib/algorithms/tests/test_node_failures.py
index dbac2e995f87..cd1ebbf0722c 100644
--- a/rllib/algorithms/tests/test_node_failures.py
+++ b/rllib/algorithms/tests/test_node_failures.py
@@ -53,10 +53,6 @@ def test_node_failure_ignore(self):
         # with fewer EnvRunners.
         config = (
             PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .environment("CartPole-v1")
             .env_runners(
                 num_env_runners=6,
@@ -74,10 +70,6 @@ def test_node_failure_recreate_env_runners(self):
         # We recreate failed EnvRunners and continue training.
         config = (
             PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .environment("CartPole-v1")
             .env_runners(
                 num_env_runners=6,
@@ -95,10 +87,6 @@ def test_node_failure_expect_crash(self):
         # We do not ignore EnvRunner failures and expect to crash upon failure.
         config = (
             PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .environment("CartPole-v1")
             .env_runners(
                 num_env_runners=6,
diff --git a/rllib/benchmarks/torch_compile/run_ppo_with_inference_bm.py b/rllib/benchmarks/torch_compile/run_ppo_with_inference_bm.py
index 23c0cba79676..c65ba67ab43d 100644
--- a/rllib/benchmarks/torch_compile/run_ppo_with_inference_bm.py
+++ b/rllib/benchmarks/torch_compile/run_ppo_with_inference_bm.py
@@ -1,8 +1,12 @@
 import argparse
 
+import gymnasium as gym
+
 from ray import tune, air
 from ray.air.constants import TRAINING_ITERATION
 from ray.rllib.algorithms.ppo import PPOConfig
+from ray.rllib.env.wrappers.atari_wrappers import wrap_atari_for_new_api_stack
+from ray.tune.registry import register_env
 
 # Note:
 # To run this benchmark you need to have a ray cluster of at least
@@ -26,10 +30,18 @@ def _parse_args():
 
 def main(pargs):
 
+    # Register our environment with tune.
+    def _env_creator(cfg):
+        return wrap_atari_for_new_api_stack(
+            gym.make("ale_py:ALE/Breakout-v5", **cfg), framestack=4
+        )
+
+    register_env("env", _env_creator)
+
     config = (
         PPOConfig()
         .environment(
-            "ale_py:ALE/Breakout-v5",
+            "env",
             clip_rewards=True,
             env_config={
                 "frameskip": 1,
@@ -80,9 +92,9 @@ def main(pargs):
 
     results = tuner.fit()
 
-    compiled_throughput = results[0].metrics["num_env_steps_sampled_throughput_per_sec"]
-    eager_throughput = results[1].metrics["num_env_steps_sampled_throughput_per_sec"]
-    print(f"Speed up (%): {100 * (compiled_throughput / eager_throughput - 1)}")
+    compiled_timer = results[0].metrics["timers"]["env_runner_sampling_timer"]
+    eager_timer = results[1].metrics["timers"]["env_runner_sampling_timer"]
+    print(f"Speed up (%): {100 * (1 - compiled_timer / eager_timer)}")
 
 
 if __name__ == "__main__":
diff --git a/rllib/connectors/tests/test_action.py b/rllib/connectors/tests/test_action.py
index 8e1fc65af43d..92da301214d3 100644
--- a/rllib/connectors/tests/test_action.py
+++ b/rllib/connectors/tests/test_action.py
@@ -1,3 +1,5 @@
+# @OldAPIStack
+
 import unittest
 
 import gymnasium as gym
diff --git a/rllib/connectors/tests/test_agent.py b/rllib/connectors/tests/test_agent.py
index 6deb2dc29077..cc1acab22588 100644
--- a/rllib/connectors/tests/test_agent.py
+++ b/rllib/connectors/tests/test_agent.py
@@ -1,3 +1,5 @@
+# @OldAPIStack
+
 import gymnasium as gym
 from gymnasium.spaces import Box
 import numpy as np
@@ -274,6 +276,7 @@ def test_vr_connector_respects_training_or_inference_vr_flags(self):
         data = AgentConnectorDataType(0, 1, agent_data)
 
         config = PPOConfig().to_dict()
+        config["_enable_new_api_stack"] = False
         ctx = ConnectorContext(
             view_requirements=view_rq_dict,
             config=config,
@@ -300,8 +303,6 @@ def test_vr_connector_respects_training_or_inference_vr_flags(self):
         check(sample_batch, sample_batch_expected)
 
     def test_vr_connector_shift_by_one(self):
-        """Test that the ViewRequirementAgentConnector can handle shift by one correctly and
-        can ignore future referencing view_requirements to respect causality"""
         view_rq_dict = {
             "state": ViewRequirement("obs"),
             "next_state": ViewRequirement(
@@ -312,6 +313,7 @@ def test_vr_connector_shift_by_one(self):
 
         obs_arrs = np.arange(10)[:, None] + 1
         config = PPOConfig().to_dict()
+        config["_enable_new_api_stack"] = False
         ctx = ConnectorContext(
             view_requirements=view_rq_dict, config=config, is_policy_recurrent=True
         )
@@ -347,6 +349,7 @@ def test_vr_connector_causal_slice(self):
 
         obs_arrs = np.arange(10)[:, None] + 1
         config = PPOConfig().to_dict()
+        config["_enable_new_api_stack"] = False
         ctx = ConnectorContext(
             view_requirements=view_rq_dict, config=config, is_policy_recurrent=True
         )
@@ -419,6 +422,7 @@ def test_vr_connector_with_multiple_buffers(self):
         act_arrs = (np.arange(10)[:, None] + 1) * 100
         n_steps = obs_arrs.shape[0]
         config = PPOConfig().to_dict()
+        config["_enable_new_api_stack"] = False
         ctx = ConnectorContext(
             view_requirements=view_rq_dict, config=config, is_policy_recurrent=True
         )
@@ -460,9 +464,12 @@ def test_vr_connector_with_multiple_buffers(self):
     def test_connector_pipline_with_view_requirement(self):
         """A very minimal test that checks wheter pipeline connectors work in a
         simulation rollout."""
-        # TODO: make this test beefier and more comprehensive
         config = (
             PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .framework("torch")
             .environment(env="CartPole-v1")
             .env_runners(create_env_on_local_worker=True)
@@ -560,6 +567,7 @@ def test_vr_connector_only_keeps_useful_timesteps(self):
         }
 
         config = PPOConfig().to_dict()
+        config["_enable_new_api_stack"] = False
         ctx = ConnectorContext(
             view_requirements=view_rqs,
             config=config,
@@ -594,6 +602,7 @@ def test_vr_connector_default_agent_collector_is_empty(self):
         }
 
         config = PPOConfig().to_dict()
+        config["_enable_new_api_stack"] = False
         ctx = ConnectorContext(
             view_requirements=view_rqs,
             config=config,
diff --git a/rllib/connectors/tests/test_connector.py b/rllib/connectors/tests/test_connector.py
index 1226bd9ff7ef..2d1e5a18855c 100644
--- a/rllib/connectors/tests/test_connector.py
+++ b/rllib/connectors/tests/test_connector.py
@@ -1,3 +1,5 @@
+# @OldAPIStack
+
 import unittest
 
 import gymnasium as gym
diff --git a/rllib/core/learner/learner.py b/rllib/core/learner/learner.py
index 537a48417705..e2ce4db2f17c 100644
--- a/rllib/core/learner/learner.py
+++ b/rllib/core/learner/learner.py
@@ -280,16 +280,15 @@ def build(self) -> None:
             return
 
         # Build learner connector pipeline used on this Learner worker.
-        if self.config.enable_env_runner_and_connector_v2:
-            # TODO (sven): Figure out which space to provide here. For now,
-            #  it doesn't matter, as the default connector piece doesn't use
-            #  this information anyway.
-            #  module_spec = self._module_spec.as_multi_rl_module_spec()
-            self._learner_connector = self.config.build_learner_connector(
-                input_observation_space=None,
-                input_action_space=None,
-                device=self._device,
-            )
+        # TODO (sven): Figure out which space to provide here. For now,
+        #  it doesn't matter, as the default connector piece doesn't use
+        #  this information anyway.
+        #  module_spec = self._module_spec.as_multi_rl_module_spec()
+        self._learner_connector = self.config.build_learner_connector(
+            input_observation_space=None,
+            input_action_space=None,
+            device=self._device,
+        )
 
         # Build the module to be trained by this learner.
         self._module = self._make_module()
@@ -1306,7 +1305,7 @@ def _update_from_batch_or_episodes(
             episodes = tree.flatten(episodes)
 
         # Call the learner connector.
-        if self._learner_connector is not None and episodes is not None:
+        if episodes is not None:
             # Call the learner connector pipeline.
             with self.metrics.log_time((ALL_MODULES, LEARNER_CONNECTOR_TIMER)):
                 shared_data = {}
@@ -1336,6 +1335,15 @@ def _update_from_batch_or_episodes(
                 {next(iter(self.module.keys())): batch}, env_steps=len(batch)
             )
 
+        # TODO (sven): Remove this leftover hack here for the situation in which we
+        #  did not go through the learner connector.
+        #  Options:
+        #  a) Either also pass given batches through the learner connector (even if
+        #     episodes is None). (preferred solution)
+        #  b) Get rid of the option to pass in a batch altogether.
+        if episodes is None:
+            batch = self._convert_batch_type(batch)
+
         # Check the MultiAgentBatch, whether our RLModule contains all ModuleIDs
         # found in this batch. If not, throw an error.
         unknown_module_ids = set(batch.policy_batches.keys()) - set(self.module.keys())
@@ -1375,11 +1383,6 @@ def _update_from_batch_or_episodes(
             # `minibatch_size` and `num_epochs` are not set by the user.
             batch_iter = MiniBatchDummyIterator
 
-        # Convert input batch into a tensor batch (MultiAgentBatch) on the correct
-        # device (e.g. GPU). We move the batch already here to avoid having to move
-        # every single minibatch that is created in the `batch_iter` below.
-        if self._learner_connector is None:
-            batch = self._convert_batch_type(batch)
         batch = self._set_slicing_by_batch_id(batch, value=True)
 
         for tensor_minibatch in batch_iter(
diff --git a/rllib/core/learner/tests/test_learner.py b/rllib/core/learner/tests/test_learner.py
index de8e700629eb..884d0b60faf6 100644
--- a/rllib/core/learner/tests/test_learner.py
+++ b/rllib/core/learner/tests/test_learner.py
@@ -37,8 +37,9 @@ def test_end_to_end_update(self):
 
         min_loss = float("inf")
         for iter_i in range(1000):
-            batch = reader.next()
-            results = learner.update_from_batch(batch=batch.as_multi_agent())
+            batch = reader.next().as_multi_agent()
+            batch = learner._convert_batch_type(batch)
+            results = learner.update_from_batch(batch=batch)
 
         loss = results[DEFAULT_MODULE_ID][Learner.TOTAL_LOSS_KEY].peek()
         min_loss = min(loss, min_loss)
diff --git a/rllib/core/learner/tests/test_learner_group.py b/rllib/core/learner/tests/test_learner_group.py
index 98300ade03df..71b6ff904619 100644
--- a/rllib/core/learner/tests/test_learner_group.py
+++ b/rllib/core/learner/tests/test_learner_group.py
@@ -28,6 +28,7 @@
 from ray.rllib.utils.test_utils import check, get_cartpole_dataset_reader
 from ray.rllib.utils.metrics import ALL_MODULES
 from ray.rllib.utils.metrics.metrics_logger import MetricsLogger
+from ray.rllib.utils.torch_utils import convert_to_torch_tensor
 from ray.util.timer import _Timer
 
 
@@ -48,130 +49,6 @@
 }
 
 
-# TODO(avnishn) Make this a ray task later. Currently thats not possible because the
-#  task is not dying after the test is done. This is a bug with ray core.
-@ray.remote(num_gpus=1)
-class RemoteTrainingHelper:
-    def local_training_helper(self, fw, scaling_mode) -> None:
-        if fw == "torch":
-            import torch
-
-            torch.manual_seed(0)
-        else:
-            raise NotImplementedError
-
-        env = gym.make("CartPole-v1")
-
-        reader = get_cartpole_dataset_reader(batch_size=500)
-        batch = reader.next().as_multi_agent()
-
-        config_overrides = LOCAL_CONFIGS[scaling_mode]
-        config = BaseTestingAlgorithmConfig().update_from_dict(config_overrides)
-
-        learner_group = config.build_learner_group(env=env)
-        local_learner = config.build_learner(env=env)
-
-        # Make the state of the learner and the local learner_group identical.
-        local_learner.set_state(learner_group.get_state()[COMPONENT_LEARNER])
-        check(local_learner.get_state(), learner_group.get_state()[COMPONENT_LEARNER])
-
-        # Update and check state again.
-        learner_update = local_learner.update_from_batch(batch=batch)
-        learner_update = MetricsLogger.peek_results(learner_update)
-        learner_group_update = learner_group.update_from_batch(batch=batch)
-        check(learner_update, learner_group_update)
-        check(local_learner.get_state(), learner_group.get_state()[COMPONENT_LEARNER])
-
-        new_module_id = "test_module"
-
-        add_module_to_learner_or_learner_group(
-            config, env, new_module_id, learner_group
-        )
-        add_module_to_learner_or_learner_group(
-            config, env, new_module_id, local_learner
-        )
-
-        # make the state of the learner and the local learner_group identical
-        local_learner.set_state(learner_group.get_state()[COMPONENT_LEARNER])
-        check(local_learner.get_state(), learner_group.get_state()[COMPONENT_LEARNER])
-
-        # Do another update.
-        batch = reader.next()
-        ma_batch = MultiAgentBatch(
-            {new_module_id: batch, DEFAULT_MODULE_ID: batch}, env_steps=batch.count
-        )
-        # the optimizer state is not initialized fully until the first time that
-        # training is completed. A call to get state before that won't contain the
-        # optimizer state. So we do a dummy update here to initialize the optimizer
-        l0 = local_learner.get_state()
-        local_learner.update_from_batch(batch=ma_batch)
-        l1 = local_learner.get_state()
-        check(
-            l0["rl_module"]["default_policy"]["policy.0.bias"],
-            l1["rl_module"]["default_policy"]["policy.0.bias"],
-            false=True,
-        )
-        check(
-            l0["rl_module"]["test_module"]["policy.0.bias"],
-            l1["rl_module"]["test_module"]["policy.0.bias"],
-            false=True,
-        )
-        check(
-            l0["optimizer"]["default_policy_default_optimizer"]["state"][0]["exp_avg"],
-            l1["optimizer"]["default_policy_default_optimizer"]["state"][0]["exp_avg"],
-            false=True,
-        )
-        check(
-            l0["optimizer"]["test_module_default_optimizer"]["state"],
-            {},
-        )
-
-        lg0 = learner_group.get_state()[COMPONENT_LEARNER]
-        check(l0, lg0)
-
-        learner_group.update_from_batch(batch=ma_batch)
-        lg1 = learner_group.get_state()[COMPONENT_LEARNER]
-
-        check(
-            lg0["rl_module"]["default_policy"]["policy.0.bias"],
-            lg1["rl_module"]["default_policy"]["policy.0.bias"],
-            false=True,
-        )
-        check(
-            lg0["rl_module"]["test_module"]["policy.0.bias"],
-            lg1["rl_module"]["test_module"]["policy.0.bias"],
-            false=True,
-        )
-        check(
-            lg0["optimizer"]["default_policy_default_optimizer"]["state"][0]["exp_avg"],
-            lg1["optimizer"]["default_policy_default_optimizer"]["state"][0]["exp_avg"],
-            false=True,
-        )
-        check(
-            lg0["optimizer"]["test_module_default_optimizer"]["state"],
-            {},
-        )
-
-        check(l1["rl_module"]["test_module"], lg1["rl_module"]["test_module"])
-        check(
-            l1["optimizer"]["test_module_default_optimizer"],
-            lg1["optimizer"]["test_module_default_optimizer"],
-        )
-        # check(l1["rl_module"]["default_policy"], lg1["rl_module"]["default_policy"])
-
-        # local_learner.update_from_batch(batch=ma_batch)
-        # learner_group.update_from_batch(batch=ma_batch)
-
-        # check(local_learner.get_state(), learner_group.get_state()[COMPONENT_LEARNER])
-        # local_learner_results = local_learner.update_from_batch(batch=ma_batch)
-        # local_learner_results = MetricsLogger.peek_results(local_learner_results)
-        # learner_group_results = learner_group.update_from_batch(batch=ma_batch)
-
-        # check(local_learner_results, learner_group_results)
-
-        # check(local_learner.get_state(), learner_group.get_state()[COMPONENT_LEARNER])
-
-
 class TestLearnerGroupSyncUpdate(unittest.TestCase):
     @classmethod
     def setUpClass(cls) -> None:
@@ -207,20 +84,6 @@ def test_learner_group_build_from_algorithm_config(self):
         print(learner_group)
         learner_group.shutdown()
 
-    # def test_learner_group_local(self):
-    #    fws = ["torch"]
-
-    #    test_iterator = itertools.product(fws, LOCAL_CONFIGS)
-
-    #    # run the logic of this test inside of a ray actor because we want tensorflow
-    #    # resources to be gracefully released. Tensorflow blocks the gpu resources
-    #    # otherwise between test cases, causing a gpu oom error.
-    #    for fw, scaling_mode in test_iterator:
-    #        print(f"Testing framework: {fw}, scaling_mode: {scaling_mode}")
-    #        training_helper = RemoteTrainingHelper.remote()
-    #        ray.get(training_helper.local_training_helper.remote(fw, scaling_mode))
-    #        del training_helper
-
     def test_update_multi_gpu(self):
         return
 
@@ -239,8 +102,8 @@ def test_update_multi_gpu(self):
 
             min_loss = float("inf")
             for iter_i in range(1000):
-                batch = reader.next()
-                results = learner_group.update_from_batch(batch=batch.as_multi_agent())
+                batch = convert_to_torch_tensor(reader.next().as_multi_agent())
+                results = learner_group.update_from_batch(batch=batch)
 
                 loss = np.mean(
                     [res[ALL_MODULES][Learner.TOTAL_LOSS_KEY] for res in results]
@@ -279,7 +142,7 @@ def test_add_module_and_remove_module(self):
             config = BaseTestingAlgorithmConfig().update_from_dict(config_overrides)
             learner_group = config.build_learner_group(env=env)
             reader = get_cartpole_dataset_reader(batch_size=512)
-            batch = reader.next()
+            batch = convert_to_torch_tensor(reader.next())
 
             # Update once with the default policy.
             learner_group.update_from_batch(batch.as_multi_agent())
@@ -451,7 +314,8 @@ def test_save_to_path_and_restore_from_path(self):
         # this is expanded to more scaling modes on the release ci.
         scaling_modes = ["local-cpu", "multi-gpu-ddp"]
         test_iterator = itertools.product(fws, scaling_modes)
-        batch = SampleBatch(FAKE_BATCH)
+        batch = SampleBatch(convert_to_torch_tensor(FAKE_BATCH)).as_multi_agent()
+
         for fw, scaling_mode in test_iterator:
             print(f"Testing framework: {fw}, scaling mode: {scaling_mode}.")
             env = gym.make("CartPole-v1")
@@ -469,7 +333,7 @@ def test_save_to_path_and_restore_from_path(self):
             initial_weights = learner_group.get_weights()
 
             # Do a single update.
-            learner_group.update_from_batch(batch.as_multi_agent())
+            learner_group.update_from_batch(batch)
             weights_after_update = learner_group.get_state(
                 components=COMPONENT_LEARNER + "/" + COMPONENT_RL_MODULE
             )[COMPONENT_LEARNER][COMPONENT_RL_MODULE]
@@ -490,9 +354,7 @@ def test_save_to_path_and_restore_from_path(self):
             learner_group.restore_from_path(learner_after_1_update_checkpoint_dir)
 
             # Do another update.
-            results_2nd_update_with_break = learner_group.update_from_batch(
-                batch=batch.as_multi_agent()
-            )
+            results_2nd_update_with_break = learner_group.update_from_batch(batch=batch)
             weights_after_2_updates_with_break = learner_group.get_state(
                 components=COMPONENT_LEARNER + "/" + COMPONENT_RL_MODULE
             )[COMPONENT_LEARNER][COMPONENT_RL_MODULE]
@@ -509,10 +371,8 @@ def test_save_to_path_and_restore_from_path(self):
             weights_after_restore.pop(COMPONENT_MULTI_RL_MODULE_SPEC)
             check(initial_weights, weights_after_restore)
             # Perform 2 updates to get to the same state as the previous learners.
-            learner_group.update_from_batch(batch.as_multi_agent())
-            results_2nd_without_break = learner_group.update_from_batch(
-                batch=batch.as_multi_agent()
-            )
+            learner_group.update_from_batch(batch)
+            results_2nd_without_break = learner_group.update_from_batch(batch=batch)
             weights_after_2_updates_without_break = learner_group.get_weights()
             learner_group.shutdown()
             del learner_group
diff --git a/rllib/core/models/tests/test_catalog.py b/rllib/core/models/tests/test_catalog.py
index f745f8d570a7..d201c60d5ab5 100644
--- a/rllib/core/models/tests/test_catalog.py
+++ b/rllib/core/models/tests/test_catalog.py
@@ -322,10 +322,6 @@ def build_vf_head(self, framework):
 
         config = (
             PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .rl_module(
                 rl_module_spec=RLModuleSpec(catalog_class=MyCatalog),
             )
diff --git a/rllib/core/rl_module/default_model_config.py b/rllib/core/rl_module/default_model_config.py
index ecd9e4b9b906..3a8e1da15a76 100644
--- a/rllib/core/rl_module/default_model_config.py
+++ b/rllib/core/rl_module/default_model_config.py
@@ -44,8 +44,8 @@ class DefaultModelConfig:
         from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig
 
         config = (
-            PPOConfig().
-            rl_module(
+            PPOConfig()
+            .rl_module(
                 model_config=DefaultModelConfig(fcnet_hiddens=[32, 32]),
             )
         )
diff --git a/rllib/core/rl_module/rl_module.py b/rllib/core/rl_module/rl_module.py
index 42aa0a780ed4..d429eb7f7bca 100644
--- a/rllib/core/rl_module/rl_module.py
+++ b/rllib/core/rl_module/rl_module.py
@@ -1,6 +1,7 @@
 import abc
 import dataclasses
 from dataclasses import dataclass, field
+import logging
 from typing import Any, Collection, Dict, Optional, Type, TYPE_CHECKING, Union
 
 import gymnasium as gym
@@ -36,6 +37,8 @@
     )
     from ray.rllib.core.models.catalog import Catalog
 
+logger = logging.getLogger("ray.rllib")
+
 
 @PublicAPI(stability="alpha")
 @dataclass
@@ -395,6 +398,7 @@ def __init__(
         # TODO (sven): Deprecate Catalog and replace with utility functions to create
         #  primitive components based on obs- and action spaces.
         self.catalog = None
+        self._catalog_ctor_error = None
 
         # Deprecated
         self.config = config
@@ -403,17 +407,10 @@ def __init__(
                 old="RLModule(config=[RLModuleConfig])",
                 new="RLModule(observation_space=.., action_space=.., inference_only=..,"
                 " learner_only=.., model_config=..)",
-                error=False,
+                help="See https://github.com/ray-project/ray/blob/master/rllib/examples/rl_modules/custom_cnn_rl_module.py "  # noqa
+                "for how to write a custom RLModule.",
+                error=True,
             )
-            self.observation_space = self.config.observation_space
-            self.action_space = self.config.action_space
-            self.inference_only = self.config.inference_only
-            self.learner_only = self.config.learner_only
-            self.model_config = self.config.model_config_dict
-            try:
-                self.catalog = self.config.get_catalog()
-            except Exception:
-                pass
         else:
             self.observation_space = observation_space
             self.action_space = action_space
@@ -426,8 +423,16 @@ def __init__(
                     action_space=self.action_space,
                     model_config_dict=self.model_config,
                 )
-            except Exception:
-                pass
+            except Exception as e:
+                logger.warning(
+                    "Could not create a Catalog object for your RLModule! If you are "
+                    "not using the new API stack yet, make sure to switch it off in "
+                    "your config: `config.api_stack(enable_rl_module_and_learner=False"
+                    ", enable_env_runner_and_connector_v2=False)`. Some algos already "
+                    "use the new stack by default. Ignore this message, if your "
+                    "RLModule does not use a Catalog to build its sub-components."
+                )
+                self._catalog_ctor_error = e
 
         # TODO (sven): Deprecate this. We keep it here for now in case users
         #  still have custom models (or subclasses of RLlib default models)
@@ -693,7 +698,7 @@ def set_state(self, state: StateDict) -> None:
     @override(Checkpointable)
     def get_ctor_args_and_kwargs(self):
         return (
-            (self.config,),  # *args
+            (),  # *args
             {
                 "observation_space": self.observation_space,
                 "action_space": self.action_space,
diff --git a/rllib/core/rl_module/tf/tests/test_tf_rl_module.py b/rllib/core/rl_module/tf/tests/test_tf_rl_module.py
index 9e1c43faa836..2b3a7bb0a9e5 100644
--- a/rllib/core/rl_module/tf/tests/test_tf_rl_module.py
+++ b/rllib/core/rl_module/tf/tests/test_tf_rl_module.py
@@ -5,7 +5,6 @@
 import tensorflow as tf
 
 from ray.rllib.core.columns import Columns
-from ray.rllib.core.rl_module.rl_module import RLModuleConfig
 from ray.rllib.core.rl_module.tf.tf_rl_module import TfRLModule
 from ray.rllib.core.testing.tf.bc_module import DiscreteBCTFModule
 from ray.rllib.utils.test_utils import check
@@ -16,11 +15,9 @@ def test_compilation(self):
 
         env = gym.make("CartPole-v1")
         module = DiscreteBCTFModule(
-            config=RLModuleConfig(
-                env.observation_space,
-                env.action_space,
-                model_config_dict={"fcnet_hiddens": [32]},
-            )
+            observation_space=env.observation_space,
+            action_space=env.action_space,
+            model_config={"fcnet_hiddens": [32]},
         )
 
         self.assertIsInstance(module, TfRLModule)
@@ -63,11 +60,9 @@ def test_forward(self):
 
         env = gym.make("CartPole-v1")
         module = DiscreteBCTFModule(
-            config=RLModuleConfig(
-                env.observation_space,
-                env.action_space,
-                model_config_dict={"fcnet_hiddens": [32]},
-            )
+            observation_space=env.observation_space,
+            action_space=env.action_space,
+            model_config={"fcnet_hiddens": [32]},
         )
 
         obs_shape = env.observation_space.shape
@@ -81,22 +76,18 @@ def test_get_set_state(self):
 
         env = gym.make("CartPole-v1")
         module = DiscreteBCTFModule(
-            config=RLModuleConfig(
-                env.observation_space,
-                env.action_space,
-                model_config_dict={"fcnet_hiddens": [32]},
-            )
+            observation_space=env.observation_space,
+            action_space=env.action_space,
+            model_config={"fcnet_hiddens": [32]},
         )
 
         state = module.get_state()
         self.assertIsInstance(state, dict)
 
         module2 = DiscreteBCTFModule(
-            config=RLModuleConfig(
-                env.observation_space,
-                env.action_space,
-                model_config_dict={"fcnet_hiddens": [32]},
-            )
+            observation_space=env.observation_space,
+            action_space=env.action_space,
+            model_config={"fcnet_hiddens": [32]},
         )
         state2 = module2.get_state()
         check(state["policy"][0], state2["policy"][0], false=True)
diff --git a/rllib/env/policy_server_input.py b/rllib/env/policy_server_input.py
index c2e5e75aebf5..eedbe224e631 100644
--- a/rllib/env/policy_server_input.py
+++ b/rllib/env/policy_server_input.py
@@ -49,6 +49,10 @@ class PolicyServerInput(ThreadingMixIn, HTTPServer, InputReader):
         addr, port = ...
         config = (
             PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .environment("CartPole-v1")
             .offline_data(
                 input_=lambda ioctx: PolicyServerInput(ioctx, addr, port)
diff --git a/rllib/env/tests/test_multi_agent_env.py b/rllib/env/tests/test_multi_agent_env.py
index 98caf12c57fa..31d4c9ea13cc 100644
--- a/rllib/env/tests/test_multi_agent_env.py
+++ b/rllib/env/tests/test_multi_agent_env.py
@@ -678,6 +678,10 @@ def test_multi_agent_with_flex_agents(self):
         register_env("flex_agents_multi_agent", lambda _: FlexAgentsMultiAgent())
         config = (
             PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .environment("flex_agents_multi_agent")
             .env_runners(num_env_runners=0)
             .training(train_batch_size=50, minibatch_size=50, num_epochs=1)
@@ -700,6 +704,10 @@ def test_multi_agent_with_sometimes_zero_agents_observing(self):
         )
         config = (
             PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .environment("sometimes_zero_agents")
             .env_runners(num_env_runners=0)
         )
diff --git a/rllib/env/tests/test_multi_agent_env_runner.py b/rllib/env/tests/test_multi_agent_env_runner.py
index 26136fb37aca..acfaa647bd70 100644
--- a/rllib/env/tests/test_multi_agent_env_runner.py
+++ b/rllib/env/tests/test_multi_agent_env_runner.py
@@ -94,12 +94,7 @@ def test_sample_episodes(self):
     def _build_config(self):
         # Build the configuration and use `PPO`.
         config = (
-            PPOConfig()
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
-            .environment(
+            PPOConfig().environment(
                 MultiAgentCartPole,
                 env_config={"num_agents": 2},
             )
diff --git a/rllib/evaluation/tests/test_env_runner_v2.py b/rllib/evaluation/tests/test_env_runner_v2.py
index d5d139f385a7..05f05c495961 100644
--- a/rllib/evaluation/tests/test_env_runner_v2.py
+++ b/rllib/evaluation/tests/test_env_runner_v2.py
@@ -52,6 +52,10 @@ def tearDownClass(cls):
     def test_sample_batch_rollout_single_agent_env(self):
         config = (
             PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .environment(DebugCounterEnv)
             .framework("torch")
             .training(
@@ -77,6 +81,10 @@ def test_sample_batch_rollout_single_agent_env(self):
     def test_sample_batch_rollout_multi_agent_env(self):
         config = (
             PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .environment("basic_multiagent")
             .framework("torch")
             .training(
@@ -144,6 +152,10 @@ def compute_actions(
 
         config = (
             PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .framework("torch")
             .environment("env_under_test")
             .env_runners(
@@ -205,6 +217,10 @@ def __init__(self, *args, **kwargs):
 
         config = (
             PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .environment("basic_multiagent")
             .framework("torch")
             .training(
@@ -274,6 +290,10 @@ def on_create_policy(self, *, policy_id, policy) -> None:
 
         config = (
             PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .environment("basic_multiagent")
             .framework("torch")
             .training(
@@ -298,6 +318,10 @@ def on_create_policy(self, *, policy_id, policy) -> None:
     def test_start_episode(self):
         config = (
             PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .environment("basic_multiagent")
             .framework("torch")
             .training(
@@ -352,6 +376,10 @@ def test_env_runner_output(self):
         # Test if we can produce RolloutMetrics just by stepping
         config = (
             PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .environment("basic_multiagent")
             .framework("torch")
             .training(
@@ -409,6 +437,10 @@ def on_episode_end(
         # Test if we can produce RolloutMetrics just by stepping
         config = (
             PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .environment("basic_multiagent")
             .framework("torch")
             .training(
diff --git a/rllib/evaluation/tests/test_rollout_worker.py b/rllib/evaluation/tests/test_rollout_worker.py
index d52529d1e632..f371ba942d5f 100644
--- a/rllib/evaluation/tests/test_rollout_worker.py
+++ b/rllib/evaluation/tests/test_rollout_worker.py
@@ -172,6 +172,10 @@ def test_batch_ids(self):
     def test_global_vars_update(self):
         config = (
             PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .environment("CartPole-v1")
             .env_runners(num_envs_per_env_runner=1)
             # lr = 0.1 - [(0.1 - 0.000001) / 100000] * ts
@@ -202,6 +206,10 @@ def test_query_evaluators(self):
         register_env("test", lambda _: gym.make("CartPole-v1"))
         config = (
             PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
             .environment("test")
             .env_runners(
                 num_env_runners=2,
diff --git a/rllib/examples/_docs/rllib_on_rllib_readme.py b/rllib/examples/_docs/rllib_on_rllib_readme.py
index 5e1090153dfc..4463eba4ce85 100644
--- a/rllib/examples/_docs/rllib_on_rllib_readme.py
+++ b/rllib/examples/_docs/rllib_on_rllib_readme.py
@@ -1,4 +1,7 @@
 import gymnasium as gym
+import numpy as np
+import torch
+
 from ray.rllib.algorithms.ppo import PPOConfig
 from ray.rllib.utils.metrics import (
     ENV_RUNNER_RESULTS,
@@ -21,7 +24,7 @@ class ParrotEnv(gym.Env):
     def __init__(self, config):
         # Make the space (for actions and observations) configurable.
         self.action_space = config.get(
-            "parrot_shriek_range", gym.spaces.Box(-1.0, 1.0, shape=(1,))
+            "parrot_shriek_range", gym.spaces.Box(-1.0, 1.0, (1,), np.float32)
         )
         # Since actions should repeat observations, their spaces must be the
         # same.
@@ -45,12 +48,12 @@ def step(self, action):
         """
         # Set `done` and `truncated` flags after 10 steps.
         self.episode_len += 1
-        done = truncated = self.episode_len >= 10
+        terminated = truncated = self.episode_len >= 10
         # r = -abs(obs - action)
         reward = -sum(abs(self.cur_obs - action))
         # Set a new observation (random sample).
         self.cur_obs = self.observation_space.sample()
-        return self.cur_obs, reward, done, truncated, {}
+        return self.cur_obs, reward, terminated, truncated, {}
 
 
 # Create an RLlib Algorithm instance from a PPOConfig to learn how to
@@ -88,7 +91,10 @@ def step(self, action):
 while not done:
     # Compute a single action, given the current observation
     # from the environment.
-    action = algo.compute_single_action(obs)
+    model_outputs = algo.env_runner.module.forward_inference(
+        {"obs": torch.from_numpy(obs)}
+    )
+    action = model_outputs["action_dist_inputs"][0].numpy()
     # Apply the computed action in the environment.
     obs, reward, done, truncated, info = env.step(action)
     # Sum up rewards for reporting purposes.
diff --git a/rllib/examples/autoregressive_action_dist.py b/rllib/examples/autoregressive_action_dist.py
index 5dfac509e580..241b6a19429d 100644
--- a/rllib/examples/autoregressive_action_dist.py
+++ b/rllib/examples/autoregressive_action_dist.py
@@ -148,7 +148,10 @@ def get_cli_args():
         get_trainable_cls(args.run)
         .get_default_config()
         # Batch-norm models have not been migrated to the RL Module API yet.
-        .api_stack(enable_rl_module_and_learner=False)
+        .api_stack(
+            enable_rl_module_and_learner=False,
+            enable_env_runner_and_connector_v2=False,
+        )
         .environment(AutoRegressiveActionEnv)
         .framework(args.framework)
         .training(gamma=0.5)
diff --git a/rllib/examples/catalogs/mobilenet_v2_encoder.py b/rllib/examples/catalogs/mobilenet_v2_encoder.py
index 93d85bcd7633..7b76fb227f2c 100644
--- a/rllib/examples/catalogs/mobilenet_v2_encoder.py
+++ b/rllib/examples/catalogs/mobilenet_v2_encoder.py
@@ -44,10 +44,6 @@ def _get_encoder_config(
 # Create a generic config with our enhanced Catalog
 ppo_config = (
     PPOConfig()
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    )
     .rl_module(rl_module_spec=RLModuleSpec(catalog_class=MobileNetEnhancedPPOCatalog))
     .env_runners(num_env_runners=0)
     # The following training settings make it so that a training iteration is very
diff --git a/rllib/examples/centralized_critic.py b/rllib/examples/centralized_critic.py
index 0cbe110810cf..01b274e92477 100644
--- a/rllib/examples/centralized_critic.py
+++ b/rllib/examples/centralized_critic.py
@@ -269,6 +269,10 @@ def get_default_policy_class(cls, config):
 
     config = (
         PPOConfig()
+        .api_stack(
+            enable_env_runner_and_connector_v2=False,
+            enable_rl_module_and_learner=False,
+        )
         .environment(TwoStepGame)
         .framework(args.framework)
         .env_runners(batch_mode="complete_episodes", num_env_runners=0)
diff --git a/rllib/examples/checkpoints/cartpole_dqn_export.py b/rllib/examples/checkpoints/cartpole_dqn_export.py
index 48e73f15b6ae..86a623d012d9 100644
--- a/rllib/examples/checkpoints/cartpole_dqn_export.py
+++ b/rllib/examples/checkpoints/cartpole_dqn_export.py
@@ -18,6 +18,10 @@
 def train_and_export_policy_and_model(algo_name, num_steps, model_dir, ckpt_dir):
     cls = get_trainable_cls(algo_name)
     config = cls.get_default_config()
+    config.api_stack(
+        enable_rl_module_and_learner=False,
+        enable_env_runner_and_connector_v2=False,
+    )
     # This Example is only for tf.
     config.framework("tf")
     # Set exporting native (DL-framework) model files to True.
diff --git a/rllib/examples/checkpoints/onnx_tf.py b/rllib/examples/checkpoints/onnx_tf.py
index 65d83fb095c3..19fb7f376032 100644
--- a/rllib/examples/checkpoints/onnx_tf.py
+++ b/rllib/examples/checkpoints/onnx_tf.py
@@ -23,7 +23,15 @@
     args = parser.parse_args()
 
     # Configure our PPO Algorithm.
-    config = ppo.PPOConfig().env_runners(num_env_runners=1).framework(args.framework)
+    config = (
+        ppo.PPOConfig()
+        .api_stack(
+            enable_env_runner_and_connector_v2=False,
+            enable_rl_module_and_learner=False,
+        )
+        .env_runners(num_env_runners=1)
+        .framework(args.framework)
+    )
 
     outdir = "export_tf"
     if os.path.exists(outdir):
diff --git a/rllib/examples/checkpoints/onnx_torch.py b/rllib/examples/checkpoints/onnx_torch.py
index f718fffb7c8a..b7d39cc9225a 100644
--- a/rllib/examples/checkpoints/onnx_torch.py
+++ b/rllib/examples/checkpoints/onnx_torch.py
@@ -11,7 +11,15 @@
 
 if __name__ == "__main__":
     # Configure our PPO Algorithm.
-    config = ppo.PPOConfig().env_runners(num_env_runners=1).framework("torch")
+    config = (
+        ppo.PPOConfig()
+        .api_stack(
+            enable_env_runner_and_connector_v2=False,
+            enable_rl_module_and_learner=False,
+        )
+        .env_runners(num_env_runners=1)
+        .framework("torch")
+    )
 
     outdir = "export_torch"
     if os.path.exists(outdir):
diff --git a/rllib/examples/custom_recurrent_rnn_tokenizer.py b/rllib/examples/custom_recurrent_rnn_tokenizer.py
deleted file mode 100644
index fd7bab9edab5..000000000000
--- a/rllib/examples/custom_recurrent_rnn_tokenizer.py
+++ /dev/null
@@ -1,188 +0,0 @@
-# @OldAPIStack
-
-"""Example of defining custom tokenizers for recurrent models in RLModules.
-
-This example shows the following steps:
-- Define a custom tokenizer for a recurrent encoder.
-- Define a model config that builds the custom tokenizer.
-- Modify the default PPOCatalog to use the custom tokenizer config.
-- Run a training that uses the custom tokenizer.
-"""
-
-import argparse
-import os
-
-import ray
-from ray import air, tune
-from ray.air.constants import TRAINING_ITERATION
-from ray.tune.registry import register_env
-from ray.rllib.examples.envs.classes.repeat_after_me_env import RepeatAfterMeEnv
-from ray.rllib.examples.envs.classes.repeat_initial_obs_env import RepeatInitialObsEnv
-from ray.rllib.core.rl_module.rl_module import RLModuleSpec
-from ray.rllib.policy.sample_batch import SampleBatch
-from dataclasses import dataclass
-from ray.rllib.core.models.base import Encoder, ENCODER_OUT
-from ray.rllib.core.models.torch.base import TorchModel
-from ray.rllib.core.models.tf.base import TfModel
-from ray.rllib.algorithms.ppo.ppo import PPOConfig
-from ray.rllib.algorithms.ppo.ppo_catalog import PPOCatalog
-from ray.rllib.utils.framework import try_import_tf, try_import_torch
-from ray.rllib.utils.metrics import (
-    ENV_RUNNER_RESULTS,
-    EPISODE_RETURN_MEAN,
-    NUM_ENV_STEPS_SAMPLED_LIFETIME,
-)
-from ray.rllib.utils.test_utils import check_learning_achieved
-from ray.rllib.core.models.configs import ModelConfig
-
-parser = argparse.ArgumentParser()
-
-tf1, tf, tfv = try_import_tf()
-torch, nn = try_import_torch()
-
-parser.add_argument("--env", type=str, default="RepeatAfterMeEnv")
-parser.add_argument("--num-cpus", type=int, default=0)
-parser.add_argument(
-    "--framework",
-    choices=["tf", "tf2", "torch"],
-    default="torch",
-    help="The DL framework specifier.",
-)
-parser.add_argument(
-    "--as-test",
-    action="store_true",
-    help="Whether this script should be run as a test: --stop-reward must "
-    "be achieved within --stop-timesteps AND --stop-iters.",
-)
-parser.add_argument(
-    "--stop-iters", type=int, default=100, help="Number of iterations to train."
-)
-parser.add_argument(
-    "--stop-timesteps", type=int, default=100000, help="Number of timesteps to train."
-)
-parser.add_argument(
-    "--stop-reward", type=float, default=90.0, help="Reward at which we stop training."
-)
-parser.add_argument(
-    "--local-mode",
-    action="store_true",
-    help="Init Ray in local mode for easier debugging.",
-)
-
-# We first define a custom tokenizer that we want to use to encode the
-# observations before they are passed into the recurrent cells.
-# We do this step for tf and for torch here to make the following steps framework-
-# agnostic. However, if you use only one framework, you can skip the other one.
-
-
-class CustomTorchTokenizer(TorchModel, Encoder):
-    def __init__(self, config) -> None:
-        TorchModel.__init__(self, config)
-        Encoder.__init__(self, config)
-        self.net = nn.Sequential(
-            nn.Linear(config.input_dims[0], config.output_dims[0]),
-        )
-
-    def _forward(self, inputs: dict, **kwargs):
-        return {ENCODER_OUT: self.net(inputs[SampleBatch.OBS])}
-
-
-class CustomTfTokenizer(TfModel, Encoder):
-    def __init__(self, config) -> None:
-        TfModel.__init__(self, config)
-        Encoder.__init__(self, config)
-
-        self.net = tf.keras.models.Sequential(
-            [
-                tf.keras.layers.Input(shape=config.input_dims),
-                tf.keras.layers.Dense(config.output_dims[0], activation="relu"),
-            ]
-        )
-
-    def _forward(self, inputs: dict, **kwargs):
-        return {ENCODER_OUT: self.net(inputs[SampleBatch.OBS])}
-
-
-# Since RLlib decides during runtime which framework we use, we need to provide a
-# model config that is buildable depending on the framework. The recurrent models
-# will consume this config during runtime and build our custom tokenizer accordingly.
-
-
-@dataclass
-class CustomTokenizerConfig(ModelConfig):
-    output_dims: tuple = None
-
-    def build(self, framework):
-        if framework == "torch":
-            return CustomTorchTokenizer(self)
-        else:
-            return CustomTfTokenizer(self)
-
-
-# We now modify the default Catalog for PPO to inject our config.
-# Alternatively, we could inherit from the PPO RLModule here, which is more
-# straightforward if we want to completely replace
-# the default models. However, we want to keep RLlib's default LSTM Encoder and only
-# place our tokenizer inside of it, so we use the Catalog here for demonstration
-# purposes.
-
-
-class CustomPPOCatalog(PPOCatalog):
-    # Note that RLlib expects this to be a classmethod.
-    @classmethod
-    def get_tokenizer_config(
-        cls,
-        observation_space,
-        model_config_dict,
-        view_requirements=None,
-    ) -> ModelConfig:
-        return CustomTokenizerConfig(
-            input_dims=observation_space.shape,
-            output_dims=(64,),
-        )
-
-
-if __name__ == "__main__":
-    args = parser.parse_args()
-
-    ray.init(num_cpus=args.num_cpus or None, local_mode=args.local_mode)
-    register_env("RepeatAfterMeEnv", lambda c: RepeatAfterMeEnv(c))
-    register_env("RepeatInitialObsEnv", lambda _: RepeatInitialObsEnv())
-
-    config = (
-        PPOConfig()
-        .environment(args.env, env_config={"repeat_delay": 2})
-        .framework(args.framework)
-        .env_runners(num_env_runners=0, num_envs_per_env_runner=20)
-        .training(
-            model={
-                "vf_share_layers": False,
-                "use_lstm": True,
-                "lstm_cell_size": 256,
-                "fcnet_hiddens": [256],
-            },
-            gamma=0.9,
-            entropy_coeff=0.001,
-            vf_loss_coeff=1e-5,
-        )
-        .rl_module(rl_module_spec=RLModuleSpec(catalog_class=CustomPPOCatalog))
-        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
-        .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
-    )
-
-    stop = {
-        TRAINING_ITERATION: args.stop_iters,
-        NUM_ENV_STEPS_SAMPLED_LIFETIME: args.stop_timesteps,
-        f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": args.stop_reward,
-    }
-
-    tuner = tune.Tuner(
-        "PPO",
-        param_space=config.to_dict(),
-        run_config=air.RunConfig(stop=stop, verbose=1),
-    )
-    results = tuner.fit()
-
-    if args.as_test:
-        check_learning_achieved(results, args.stop_reward)
-    ray.shutdown()
diff --git a/rllib/examples/debugging/deterministic_training.py b/rllib/examples/debugging/deterministic_training.py
index 5ef6ee1a0167..9e7a8960c56e 100644
--- a/rllib/examples/debugging/deterministic_training.py
+++ b/rllib/examples/debugging/deterministic_training.py
@@ -36,6 +36,10 @@
     config = (
         get_trainable_cls(args.run)
         .get_default_config()
+        .api_stack(
+            enable_rl_module_and_learner=False,
+            enable_env_runner_and_connector_v2=False,
+        )
         .environment(
             CartPoleWithRemoteParamServer,
             env_config={"param_server": "param-server"},
diff --git a/rllib/examples/hierarchical/hierarchical_training.py b/rllib/examples/hierarchical/hierarchical_training.py
index 924aa5de2f07..ccdc067fe3ae 100644
--- a/rllib/examples/hierarchical/hierarchical_training.py
+++ b/rllib/examples/hierarchical/hierarchical_training.py
@@ -91,6 +91,10 @@
             run_config=air.RunConfig(stop=stop),
             param_space=(
                 PPOConfig()
+                .api_stack(
+                    enable_env_runner_and_connector_v2=False,
+                    enable_rl_module_and_learner=False,
+                )
                 .environment(WindyMazeEnv)
                 .env_runners(num_env_runners=0)
                 .framework(args.framework)
@@ -107,6 +111,10 @@ def policy_mapping_fn(agent_id, episode, worker, **kwargs):
 
         config = (
             PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .environment(HierarchicalWindyMazeEnv)
             .framework(args.framework)
             .env_runners(num_env_runners=0)
diff --git a/rllib/examples/inference/policy_inference_after_training.py b/rllib/examples/inference/policy_inference_after_training.py
index b462263a5dab..4ece833c3c53 100644
--- a/rllib/examples/inference/policy_inference_after_training.py
+++ b/rllib/examples/inference/policy_inference_after_training.py
@@ -138,7 +138,7 @@
     )
     # Create new RLModule and restore its state from the last algo checkpoint.
     # Note that the checkpoint for the RLModule can be found deeper inside the algo
-    # checkpoint's sub-directories ([algo dir] -> "learner/" -> "module_state/" ->
+    # checkpoint's subdirectories ([algo dir] -> "learner/" -> "module_state/" ->
     # "[module ID]):
     rl_module = RLModule.from_checkpoint(
         os.path.join(
diff --git a/rllib/examples/inference/policy_inference_after_training_with_attention.py b/rllib/examples/inference/policy_inference_after_training_with_attention.py
index 07779e3bc0b2..1e594066d18f 100644
--- a/rllib/examples/inference/policy_inference_after_training_with_attention.py
+++ b/rllib/examples/inference/policy_inference_after_training_with_attention.py
@@ -84,6 +84,10 @@
     config = (
         get_trainable_cls(args.run)
         .get_default_config()
+        .api_stack(
+            enable_env_runner_and_connector_v2=False,
+            enable_rl_module_and_learner=False,
+        )
         .environment("FrozenLake-v1")
         # Run with tracing enabled for tf2?
         .framework(args.framework)
diff --git a/rllib/examples/inference/policy_inference_after_training_with_lstm.py b/rllib/examples/inference/policy_inference_after_training_with_lstm.py
index a7dc5ada6f3c..39c6ac6aa588 100644
--- a/rllib/examples/inference/policy_inference_after_training_with_lstm.py
+++ b/rllib/examples/inference/policy_inference_after_training_with_lstm.py
@@ -82,6 +82,10 @@
     config = (
         get_trainable_cls(args.run)
         .get_default_config()
+        .api_stack(
+            enable_env_runner_and_connector_v2=False,
+            enable_rl_module_and_learner=False,
+        )
         .environment("FrozenLake-v1")
         # Run with tracing enabled for tf2?
         .framework(args.framework)
diff --git a/rllib/examples/learners/custom_loss_fn_simple.py b/rllib/examples/learners/custom_loss_fn_simple.py
index 9877fa10cddf..aa50db615977 100644
--- a/rllib/examples/learners/custom_loss_fn_simple.py
+++ b/rllib/examples/learners/custom_loss_fn_simple.py
@@ -112,10 +112,6 @@ class for details on how to override the main (PPO) loss function.
 
     base_config = (
         PPOConfig()
-        .api_stack(
-            enable_rl_module_and_learner=True,
-            enable_env_runner_and_connector_v2=True,
-        )
         .environment("CartPole-v1")
         .training(
             # This is the most important setting in this script: We point our PPO
diff --git a/rllib/examples/metrics/custom_metrics_in_env_runners.py b/rllib/examples/metrics/custom_metrics_in_env_runners.py
index cba86a50afb6..6c69bdc5746e 100644
--- a/rllib/examples/metrics/custom_metrics_in_env_runners.py
+++ b/rllib/examples/metrics/custom_metrics_in_env_runners.py
@@ -2,8 +2,8 @@
 
 We use the `MetricsLogger` class, which RLlib provides inside all its components (only
 when using the new API stack through
-`config.api_stack(_enable_rl_module_and_learner=True,
-_enable_env_runner_and_connector_v2=True)`),
+`config.api_stack(enable_rl_module_and_learner=True,
+enable_env_runner_and_connector_v2=True)`),
 and which offers a unified API to log individual values per iteration, per episode
 timestep, per episode (as a whole), per loss call, etc..
 `MetricsLogger` objects are available in all custom API code, for example inside your
diff --git a/rllib/examples/offline_rl/train_w_bc_finetune_w_ppo.py b/rllib/examples/offline_rl/train_w_bc_finetune_w_ppo.py
index 348dfb2af142..979d47562cca 100644
--- a/rllib/examples/offline_rl/train_w_bc_finetune_w_ppo.py
+++ b/rllib/examples/offline_rl/train_w_bc_finetune_w_ppo.py
@@ -264,10 +264,6 @@ def compute_values(self, batch, embeddings=None):
     # Create a new PPO config.
     base_config = (
         PPOConfig()
-        .api_stack(
-            enable_rl_module_and_learner=True,
-            enable_env_runner_and_connector_v2=True,
-        )
         .environment(args.env)
         .training(
             # Keep lr relatively low at the beginning to avoid catastrophic forgetting.
diff --git a/rllib/examples/ray_tune/custom_experiment.py b/rllib/examples/ray_tune/custom_experiment.py
index 779c5c1fd041..66ce75c11eb6 100644
--- a/rllib/examples/ray_tune/custom_experiment.py
+++ b/rllib/examples/ray_tune/custom_experiment.py
@@ -153,15 +153,7 @@ def my_experiment(config: Dict):
 
 
 if __name__ == "__main__":
-    base_config = (
-        PPOConfig()
-        .api_stack(
-            enable_rl_module_and_learner=True,
-            enable_env_runner_and_connector_v2=True,
-        )
-        .environment("CartPole-v1")
-        .env_runners(num_env_runners=0)
-    )
+    base_config = PPOConfig().environment("CartPole-v1").env_runners(num_env_runners=0)
     # Convert to a plain dict for Tune. Note that this is usually not needed, you can
     # pass into the below Tune Tuner any instantiated RLlib AlgorithmConfig object.
     # However, for demonstration purposes, we show here how you can add other, arbitrary
diff --git a/rllib/examples/ray_tune/custom_logger.py b/rllib/examples/ray_tune/custom_logger.py
index 5aedacc512a5..9823e47daaec 100644
--- a/rllib/examples/ray_tune/custom_logger.py
+++ b/rllib/examples/ray_tune/custom_logger.py
@@ -87,12 +87,7 @@ def flush(self):
 
 if __name__ == "__main__":
     config = (
-        PPOConfig()
-        .api_stack(
-            enable_rl_module_and_learner=True,
-            enable_env_runner_and_connector_v2=True,
-        )
-        .environment("CartPole-v1")
+        PPOConfig().environment("CartPole-v1")
         # Setting up a custom logger config.
         # ----------------------------------
         # The following are different examples of custom logging setups:
diff --git a/rllib/examples/ray_tune/custom_progress_reporter.py b/rllib/examples/ray_tune/custom_progress_reporter.py
index 5aee72218ddf..092b0710db57 100644
--- a/rllib/examples/ray_tune/custom_progress_reporter.py
+++ b/rllib/examples/ray_tune/custom_progress_reporter.py
@@ -94,10 +94,6 @@
 
     config = (
         PPOConfig()
-        .api_stack(
-            enable_rl_module_and_learner=True,
-            enable_env_runner_and_connector_v2=True,
-        )
         .environment("env")
         .multi_agent(
             # Define 3 policies. Note that in our simple setup, they are all configured
diff --git a/rllib/examples/rl_modules/action_masking_rl_module.py b/rllib/examples/rl_modules/action_masking_rl_module.py
index 0fc0492a6843..a4ac85c26ac8 100644
--- a/rllib/examples/rl_modules/action_masking_rl_module.py
+++ b/rllib/examples/rl_modules/action_masking_rl_module.py
@@ -92,11 +92,6 @@
 
     base_config = (
         PPOConfig()
-        .api_stack(
-            # This example runs only under the new pai stack.
-            enable_rl_module_and_learner=True,
-            enable_env_runner_and_connector_v2=True,
-        )
         .environment(
             env=ActionMaskEnv,
             env_config={
diff --git a/rllib/examples/rl_modules/autoregressive_actions_rl_module.py b/rllib/examples/rl_modules/autoregressive_actions_rl_module.py
index 2f7e576a5bd5..af1e27146582 100644
--- a/rllib/examples/rl_modules/autoregressive_actions_rl_module.py
+++ b/rllib/examples/rl_modules/autoregressive_actions_rl_module.py
@@ -93,10 +93,6 @@
         .env_runners(
             num_env_runners=0,
         )
-        .api_stack(
-            enable_env_runner_and_connector_v2=True,
-            enable_rl_module_and_learner=True,
-        )
         .evaluation(
             evaluation_num_env_runners=1,
             evaluation_interval=1,
diff --git a/rllib/examples/rl_modules/classes/mobilenet_rlm.py b/rllib/examples/rl_modules/classes/mobilenet_rlm.py
index 206bdda36bd8..8f3a86e69235 100644
--- a/rllib/examples/rl_modules/classes/mobilenet_rlm.py
+++ b/rllib/examples/rl_modules/classes/mobilenet_rlm.py
@@ -53,10 +53,6 @@ def setup(self):
 
 config = (
     PPOConfig()
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    )
     .rl_module(rl_module_spec=RLModuleSpec(module_class=MobileNetTorchPPORLModule))
     .environment(
         RandomEnv,
diff --git a/rllib/examples/rl_modules/custom_lstm_rl_module.py b/rllib/examples/rl_modules/custom_lstm_rl_module.py
index 88a98cd5476e..85b160808bd7 100644
--- a/rllib/examples/rl_modules/custom_lstm_rl_module.py
+++ b/rllib/examples/rl_modules/custom_lstm_rl_module.py
@@ -63,10 +63,6 @@
 if __name__ == "__main__":
     args = parser.parse_args()
 
-    assert (
-        args.enable_new_api_stack
-    ), "Must set --enable-new-api-stack when running this script!"
-
     if args.num_agents == 0:
         register_env("env", lambda cfg: StatelessCartPole())
     else:
diff --git a/rllib/examples/rl_modules/migrate_modelv2_to_new_api_stack_by_config.py b/rllib/examples/rl_modules/migrate_modelv2_to_new_api_stack_by_config.py
index 363ea610db67..21b68184051f 100644
--- a/rllib/examples/rl_modules/migrate_modelv2_to_new_api_stack_by_config.py
+++ b/rllib/examples/rl_modules/migrate_modelv2_to_new_api_stack_by_config.py
@@ -12,6 +12,10 @@
     # Configure an old stack default ModelV2.
     config_old_stack = (
         PPOConfig()
+        .api_stack(
+            enable_env_runner_and_connector_v2=False,
+            enable_rl_module_and_learner=False,
+        )
         .environment("CartPole-v1")
         .training(
             lr=0.0003,
diff --git a/rllib/execution/rollout_ops.py b/rllib/execution/rollout_ops.py
index 255d0ba4ba71..7c3fd489802f 100644
--- a/rllib/execution/rollout_ops.py
+++ b/rllib/execution/rollout_ops.py
@@ -63,12 +63,18 @@ def synchronous_parallel_sample(
 
         # Define an RLlib Algorithm.
         from ray.rllib.algorithms.ppo import PPO, PPOConfig
-        config = PPOConfig().environment("CartPole-v1")
-        algorithm = PPO(config=config)
-        # 2 remote workers (num_workers=2):
-        batches = synchronous_parallel_sample(worker_set=algorithm.env_runner_group,
-            concat=False)
-        print(len(batches))
+        config = (
+            PPOConfig()
+            .environment("CartPole-v1")
+        )
+        algorithm = config.build()
+        # 2 remote EnvRunners (num_env_runners=2):
+        episodes = synchronous_parallel_sample(
+            worker_set=algorithm.env_runner_group,
+            _uses_new_env_runners=True,
+            concat=False,
+        )
+        print(len(episodes))
 
     .. testoutput::
 
diff --git a/rllib/models/tests/test_attention_nets.py b/rllib/models/tests/test_attention_nets.py
index bed5ad726fbc..e105955908da 100644
--- a/rllib/models/tests/test_attention_nets.py
+++ b/rllib/models/tests/test_attention_nets.py
@@ -18,6 +18,7 @@
 class TestAttentionNets(unittest.TestCase):
 
     config = {
+        "_enable_new_api_stack": False,
         "env": StatelessCartPole,
         "gamma": 0.99,
         "num_envs_per_env_runner": 20,
@@ -40,6 +41,7 @@ def tearDownClass(cls) -> None:
     def test_attention_nets_w_prev_actions_and_prev_rewards(self):
         """Tests attention prev-a/r input insertions using complex actions."""
         config = {
+            "_enable_new_api_stack": False,
             "env": RandomEnv,
             "env_config": {
                 "config": {
@@ -110,38 +112,6 @@ def test_ppo_attention_net_learning(self):
             run_config=air.RunConfig(stop=self.stop, verbose=1),
         ).fit()
 
-    # TODO: (sven) causes memory failures/timeouts on Travis.
-    #  Re-enable this once we have fast attention in master branch.
-    def test_impala_attention_net_learning(self):
-        return
-        # ModelCatalog.register_custom_model("attention_net", GTrXLNet)
-        # config = dict(
-        #    self.config, **{
-        #        "num_env_runners": 4,
-        #        "num_gpus": 0,
-        #        "entropy_coeff": 0.01,
-        #        "vf_loss_coeff": 0.001,
-        #        "lr": 0.0008,
-        #        "model": {
-        #            "custom_model": "attention_net",
-        #            "max_seq_len": 65,
-        #            "custom_model_config": {
-        #                "num_transformer_units": 1,
-        #                "attention_dim": 64,
-        #                "num_heads": 1,
-        #                "memory_inference": 10,
-        #                "memory_training": 10,
-        #                "head_dim": 32,
-        #                "position_wise_mlp_dim": 32,
-        #            },
-        #        },
-        #    })
-        # tune.Tuner(
-        #     "IMPALA",
-        #     param_space=config,
-        #     run_config=air.RunConfig(stop=self.stop, verbose=1),
-        # ).fit()
-
 
 if __name__ == "__main__":
     import pytest
diff --git a/rllib/models/tests/test_models.py b/rllib/models/tests/test_models.py
index 86100e86b690..8ba77be666e1 100644
--- a/rllib/models/tests/test_models.py
+++ b/rllib/models/tests/test_models.py
@@ -62,6 +62,10 @@ def test_tf_modelv2(self):
     def test_modelv3(self):
         config = (
             ppo.PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .environment("CartPole-v1")
             .framework("tf")
             .env_runners(num_env_runners=0)
diff --git a/rllib/models/tests/test_preprocessors.py b/rllib/models/tests/test_preprocessors.py
index 03a344de3289..f4451f15f11a 100644
--- a/rllib/models/tests/test_preprocessors.py
+++ b/rllib/models/tests/test_preprocessors.py
@@ -38,6 +38,10 @@ def tearDownClass(cls) -> None:
     def test_preprocessing_disabled_modelv2(self):
         config = (
             ppo.PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .environment(
                 "ray.rllib.examples.envs.classes.random_env.RandomEnv",
                 env_config={
diff --git a/rllib/offline/tests/test_offline_env_runner.py b/rllib/offline/tests/test_offline_env_runner.py
index 08c0ee5fa5c7..41dbb016b092 100644
--- a/rllib/offline/tests/test_offline_env_runner.py
+++ b/rllib/offline/tests/test_offline_env_runner.py
@@ -18,11 +18,6 @@ def setUp(self) -> None:
         self.base_path = pathlib.Path("/tmp/")
         self.config = (
             PPOConfig()
-            # Enable new API stack and use EnvRunner.
-            .api_stack(
-                enable_rl_module_and_learner=True,
-                enable_env_runner_and_connector_v2=True,
-            )
             .env_runners(
                 # This defines how many rows per file we will
                 # have (given `num_rows_per_file` in the
diff --git a/rllib/offline/tests/test_offline_prelearner.py b/rllib/offline/tests/test_offline_prelearner.py
index 503c35b0dfd1..919a9c12343b 100644
--- a/rllib/offline/tests/test_offline_prelearner.py
+++ b/rllib/offline/tests/test_offline_prelearner.py
@@ -238,10 +238,6 @@ def test_offline_prelearner_sample_from_episode_data(self):
             .env_runners(
                 batch_mode="complete_episodes",
             )
-            .api_stack(
-                enable_env_runner_and_connector_v2=True,
-                enable_rl_module_and_learner=True,
-            )
             .offline_data(
                 output=data_path,
                 output_write_episodes=True,
diff --git a/rllib/policy/tests/test_compute_log_likelihoods.py b/rllib/policy/tests/test_compute_log_likelihoods.py
index 3aa8f19b776d..c1024dd80b17 100644
--- a/rllib/policy/tests/test_compute_log_likelihoods.py
+++ b/rllib/policy/tests/test_compute_log_likelihoods.py
@@ -129,20 +129,30 @@ def tearDownClass(cls) -> None:
 
     def test_ppo_cont(self):
         """Tests PPO's (cont. actions) compute_log_likelihoods method."""
-        config = ppo.PPOConfig()
-        config.training(
-            model={
-                "fcnet_hiddens": [10],
-                "fcnet_activation": "linear",
-            }
+        config = (
+            ppo.PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
+            .training(
+                model={
+                    "fcnet_hiddens": [10],
+                    "fcnet_activation": "linear",
+                }
+            )
+            .debugging(seed=42)
         )
-        config.debugging(seed=42)
         prev_a = np.array([0.0])
         do_test_log_likelihood(ppo.PPO, config, prev_a, continuous=True)
 
     def test_ppo_discr(self):
         """Tests PPO's (discr. actions) compute_log_likelihoods method."""
         config = ppo.PPOConfig()
+        config.api_stack(
+            enable_env_runner_and_connector_v2=False,
+            enable_rl_module_and_learner=False,
+        )
         config.debugging(seed=42)
         prev_a = np.array(0)
         do_test_log_likelihood(ppo.PPO, config, prev_a)
diff --git a/rllib/policy/tests/test_export_checkpoint_and_model.py b/rllib/policy/tests/test_export_checkpoint_and_model.py
index 3515525ef1d3..2df1ff7defe5 100644
--- a/rllib/policy/tests/test_export_checkpoint_and_model.py
+++ b/rllib/policy/tests/test_export_checkpoint_and_model.py
@@ -21,6 +21,10 @@ def export_test(
 ):
     cls = get_trainable_cls(alg_name)
     config = cls.get_default_config()
+    config.api_stack(
+        enable_rl_module_and_learner=False,
+        enable_env_runner_and_connector_v2=False,
+    )
     config.framework(framework)
     # Switch on saving native DL-framework (tf, torch) model files.
     config.checkpointing(export_native_model_files=True)
diff --git a/rllib/policy/tests/test_policy_checkpoint_restore.py b/rllib/policy/tests/test_policy_checkpoint_restore.py
index 87ff462e7787..cb7f15c9918b 100644
--- a/rllib/policy/tests/test_policy_checkpoint_restore.py
+++ b/rllib/policy/tests/test_policy_checkpoint_restore.py
@@ -16,6 +16,10 @@ def _do_checkpoint_twice_test(framework):
     # Checks if we can load a policy from a checkpoint (at least) twice
     config = (
         PPOConfig()
+        .api_stack(
+            enable_rl_module_and_learner=False,
+            enable_env_runner_and_connector_v2=False,
+        )
         .env_runners(num_env_runners=0)
         .evaluation(evaluation_num_env_runners=0)
     )
@@ -94,10 +98,12 @@ def test_restore_checkpoint_with_nested_obs_space(self):
             space.original_space = gym.spaces.Discrete(2)
             space = space.original_space
 
-        # TODO(Artur): Construct a PPO policy here without the algorithm once we are
-        #  able to do that with RLModules.
         policy = (
             PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .environment(
                 observation_space=obs_space, action_space=gym.spaces.Discrete(2)
             )
diff --git a/rllib/policy/tests/test_policy_map.py b/rllib/policy/tests/test_policy_map.py
index 94471a393c61..0a8911b895a5 100644
--- a/rllib/policy/tests/test_policy_map.py
+++ b/rllib/policy/tests/test_policy_map.py
@@ -23,7 +23,14 @@ def test_policy_map(self):
         # This is testing policy map which is something that will be deprecated in
         # favor of MultiAgentRLModules in the future. So we'll disable the RLModule API
         # for this test for now.
-        config = PPOConfig().framework("tf2")
+        config = (
+            PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
+            .framework("tf2")
+        )
         obs_space = gym.spaces.Box(-1.0, 1.0, (4,), dtype=np.float32)
         dummy_obs = obs_space.sample()
         act_space = gym.spaces.Discrete(10000)
diff --git a/rllib/tests/run_regression_tests.py b/rllib/tests/run_regression_tests.py
index 5db2d0a8262d..8f1675bab881 100644
--- a/rllib/tests/run_regression_tests.py
+++ b/rllib/tests/run_regression_tests.py
@@ -1,20 +1,6 @@
 #!/usr/bin/env python
-# Runs one or more regression tests. Retries tests up to 3 times.
-#
-# Example usage:
-# $ python run_regression_tests.py regression-tests/cartpole-es-[tf|torch].yaml
-#
-# When using in BAZEL (with py_test), e.g. see in ray/rllib/BUILD:
-# py_test(
-#     name = "run_regression_tests",
-#     main = "tests/run_regression_tests.py",
-#     tags = ["learning_tests"],
-#     size = "medium",  # 5min timeout
-#     srcs = ["tests/run_regression_tests.py"],
-#     data = glob(["tuned_examples/regression_tests/*.yaml"]),
-#     # Pass `BAZEL` option and the path to look for yaml regression files.
-#     args = ["BAZEL", "tuned_examples/regression_tests"]
-# )
+
+# @OldAPIStack
 
 import argparse
 import os
@@ -104,15 +90,6 @@
     default=None,
     help="The WandB run name to use.",
 )
-# parser.add_argument(
-#    "--wandb-from-checkpoint",
-#    type=str,
-#    default=None,
-#    help=(
-#        "The WandB checkpoint location (e.g. `[team name]/[project name]/checkpoint_"
-#        "[run name]:v[version]`) from which to resume an experiment."
-#    ),
-# )
 parser.add_argument(
     "--checkpoint-freq",
     type=int,
diff --git a/rllib/tests/test_dependency_torch.py b/rllib/tests/test_dependency_torch.py
index 7048d0f92cf2..bcd720a6c7aa 100755
--- a/rllib/tests/test_dependency_torch.py
+++ b/rllib/tests/test_dependency_torch.py
@@ -21,6 +21,10 @@
     # Note: No ray.init(), to test it works without Ray
     config = (
         PPOConfig()
+        .api_stack(
+            enable_env_runner_and_connector_v2=False,
+            enable_rl_module_and_learner=False,
+        )
         .environment("CartPole-v1")
         .framework("tf")
         .env_runners(num_env_runners=0)
diff --git a/rllib/tests/test_gpus.py b/rllib/tests/test_gpus.py
index 54ef39821f23..24511e14367a 100644
--- a/rllib/tests/test_gpus.py
+++ b/rllib/tests/test_gpus.py
@@ -18,7 +18,15 @@ def test_gpus_in_non_local_mode(self):
         actual_gpus = torch.cuda.device_count()
         print(f"Actual GPUs found (by torch): {actual_gpus}")
 
-        config = PPOConfig().env_runners(num_env_runners=2).environment("CartPole-v1")
+        config = (
+            PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
+            .env_runners(num_env_runners=2)
+            .environment("CartPole-v1")
+        )
 
         # Expect errors when we run a config w/ num_gpus>0 w/o a GPU
         # and _fake_gpus=False.
@@ -82,7 +90,15 @@ def test_gpus_in_local_mode(self):
 
         actual_gpus_available = torch.cuda.device_count()
 
-        config = PPOConfig().env_runners(num_env_runners=2).environment("CartPole-v1")
+        config = (
+            PPOConfig()
+            .api_stack(
+                enable_rl_module_and_learner=False,
+                enable_env_runner_and_connector_v2=False,
+            )
+            .env_runners(num_env_runners=2)
+            .environment("CartPole-v1")
+        )
 
         # Expect no errors in local mode.
         for num_gpus in [0, 0.1, 1, actual_gpus_available + 4]:
diff --git a/rllib/tests/test_io.py b/rllib/tests/test_io.py
index 6e4b2298b5d7..be0c8aaafafb 100644
--- a/rllib/tests/test_io.py
+++ b/rllib/tests/test_io.py
@@ -57,6 +57,10 @@ def tearDown(self):
     def write_outputs(self, output, fw, output_config=None):
         config = (
             PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .environment("CartPole-v1")
             .framework(fw)
             .training(train_batch_size=250)
@@ -98,6 +102,10 @@ def test_agent_output_infos(self):
     def test_agent_input_dir(self):
         config = (
             PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .environment("CartPole-v1")
             .evaluation(off_policy_estimation_methods={})
             .training(train_batch_size=250)
@@ -125,6 +133,10 @@ def test_split_by_episode(self):
     def test_agent_input_postprocessing_enabled(self):
         config = (
             PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .environment("CartPole-v1")
             .training(train_batch_size=250)
             .offline_data(
@@ -169,6 +181,10 @@ def test_agent_input_postprocessing_enabled(self):
     def test_agent_input_eval_sampler(self):
         config = (
             PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .environment("CartPole-v1")
             .offline_data(
                 postprocess_inputs=True,  # adds back 'advantages'
@@ -210,6 +226,10 @@ def input_creator(ioctx: IOContext) -> InputReader:
 
             config = (
                 PPOConfig()
+                .api_stack(
+                    enable_env_runner_and_connector_v2=False,
+                    enable_rl_module_and_learner=False,
+                )
                 .environment("CartPole-v1")
                 .offline_data(input_=input_procedure)
                 .evaluation(off_policy_estimation_methods={})
@@ -229,6 +249,10 @@ def test_multiple_output_workers(self):
 
         config = (
             PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .environment("CartPole-v1")
             .env_runners(num_env_runners=2)
             .training(train_batch_size=500)
diff --git a/rllib/tests/test_lstm.py b/rllib/tests/test_lstm.py
index 969683d8ca38..9481205f9291 100644
--- a/rllib/tests/test_lstm.py
+++ b/rllib/tests/test_lstm.py
@@ -178,6 +178,10 @@ def test_minibatch_sequencing(self):
         register_env("counter", lambda _: DebugCounterEnv())
         config = (
             PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .environment("counter")
             .framework("tf")
             .env_runners(num_env_runners=0, rollout_fragment_length=20)
diff --git a/rllib/tests/test_nn_framework_import_errors.py b/rllib/tests/test_nn_framework_import_errors.py
index 61c06816d09d..d117bf0f385d 100644
--- a/rllib/tests/test_nn_framework_import_errors.py
+++ b/rllib/tests/test_nn_framework_import_errors.py
@@ -9,7 +9,15 @@ def test_dont_import_torch_error():
     """Check error being thrown, if torch not installed but configured."""
     # Do not import tf for testing purposes.
     os.environ["RLLIB_TEST_NO_TORCH_IMPORT"] = "1"
-    config = ppo.PPOConfig().environment("CartPole-v1").framework("torch")
+    config = (
+        ppo.PPOConfig()
+        .api_stack(
+            enable_env_runner_and_connector_v2=False,
+            enable_rl_module_and_learner=False,
+        )
+        .environment("CartPole-v1")
+        .framework("torch")
+    )
     with pytest.raises(ImportError, match="However, no installation was found"):
         config.build()
 
diff --git a/rllib/tests/test_pettingzoo_env.py b/rllib/tests/test_pettingzoo_env.py
index e77d42a89811..e42d18b77f5c 100644
--- a/rllib/tests/test_pettingzoo_env.py
+++ b/rllib/tests/test_pettingzoo_env.py
@@ -54,6 +54,10 @@ def env_creator(config):
 
         config = (
             PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .environment("pistonball", env_config={"local_ratio": 0.5})
             .multi_agent(
                 # Set of policy IDs (by default, will use Algorithms's
@@ -82,6 +86,10 @@ def test_pettingzoo_env(self):
 
         config = (
             PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .environment("simple_spread")
             .env_runners(num_env_runners=0, rollout_fragment_length=30)
             .debugging(log_level="DEBUG")
diff --git a/rllib/tests/test_placement_groups.py b/rllib/tests/test_placement_groups.py
index 2e056e09d1a9..2d268a9a4b10 100644
--- a/rllib/tests/test_placement_groups.py
+++ b/rllib/tests/test_placement_groups.py
@@ -35,6 +35,10 @@ def test_overriding_default_resource_request(self):
         # 3 Trials: Can only run 2 at a time (num_cpus=6; needed: 3).
         config = (
             PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .training(
                 model={"fcnet_hiddens": [10]}, lr=tune.grid_search([0.1, 0.01, 0.001])
             )
@@ -71,6 +75,10 @@ def default_resource_request(cls, config):
     def test_default_resource_request(self):
         config = (
             PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .resources(placement_strategy="SPREAD")
             .env_runners(
                 num_env_runners=2,
@@ -98,6 +106,10 @@ def test_default_resource_request(self):
     def test_default_resource_request_plus_manual_leads_to_error(self):
         config = (
             PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .training(model={"fcnet_hiddens": [10]})
             .environment("CartPole-v1")
             .env_runners(num_env_runners=0)
diff --git a/rllib/tests/test_ray_client.py b/rllib/tests/test_ray_client.py
deleted file mode 100644
index cce060f8b69c..000000000000
--- a/rllib/tests/test_ray_client.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import os
-import unittest
-
-import ray
-from ray import air, tune
-from ray.air.constants import TRAINING_ITERATION
-import ray.rllib.algorithms.ppo as ppo
-from ray.rllib.examples.envs.classes.stateless_cartpole import StatelessCartPole
-from ray.util.client.ray_client_helpers import ray_start_client_server
-
-
-class TestRayClient(unittest.TestCase):
-    def test_connection(self):
-        with ray_start_client_server():
-            assert ray.util.client.ray.is_connected()
-        assert ray.util.client.ray.is_connected() is False
-
-    def test_custom_experiment(self):
-        with ray_start_client_server():
-            assert ray.util.client.ray.is_connected()
-
-            config = {
-                # Special flag signalling `my_experiment` how many iters to do.
-                "train-iterations": 2,
-                "lr": 0.01,
-                # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
-                "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
-                "num_env_runners": 0,
-                "framework": "tf",
-            }
-            resources = ppo.PPO.default_resource_request(config)
-            from ray.rllib.examples.ray_tune.custom_experiment import my_experiment
-
-            tune.Tuner(
-                tune.with_resources(my_experiment, resources),
-                param_space=config,
-            ).fit()
-
-    def test_cartpole_lstm(self):
-        with ray_start_client_server():
-            assert ray.util.client.ray.is_connected()
-
-            config = {
-                "env": StatelessCartPole,
-            }
-
-            stop = {TRAINING_ITERATION: 3}
-
-            tune.Tuner(
-                "PPO",
-                param_space=config,
-                run_config=air.RunConfig(stop=stop, verbose=2),
-            ).fit()
-
-
-if __name__ == "__main__":
-    import pytest
-    import sys
-
-    sys.exit(pytest.main(["-v", __file__]))
diff --git a/rllib/tests/test_timesteps.py b/rllib/tests/test_timesteps.py
index 6b95864d26aa..f0a081c57246 100644
--- a/rllib/tests/test_timesteps.py
+++ b/rllib/tests/test_timesteps.py
@@ -20,6 +20,10 @@ def test_timesteps(self):
         """Test whether PG can be built with both frameworks."""
         config = (
             ppo.PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             .experimental(_disable_preprocessor_api=True)
             .environment(RandomEnv)
             .env_runners(num_env_runners=0)
diff --git a/rllib/train.py b/rllib/train.py
index 5f9439b0138a..32f59ec39463 100755
--- a/rllib/train.py
+++ b/rllib/train.py
@@ -91,6 +91,10 @@ def load_experiments_from_file(
             experiments = yaml.safe_load(f)
             if stop is not None and stop != "{}":
                 raise ValueError("`stop` criteria only supported for python files.")
+        # Make sure yaml experiments are always old API stack.
+        for experiment in experiments.values():
+            experiment["config"]["enable_rl_module_and_learner"] = False
+            experiment["config"]["enable_env_runner_and_connector_v2"] = False
     # Python file case (ensured by file type enum)
     else:
         module_name = os.path.basename(config_file).replace(".py", "")
diff --git a/rllib/tuned_examples/bc/cartpole_recording.py b/rllib/tuned_examples/bc/cartpole_recording.py
index 4ce66aef5c47..e34b76a2c953 100644
--- a/rllib/tuned_examples/bc/cartpole_recording.py
+++ b/rllib/tuned_examples/bc/cartpole_recording.py
@@ -15,11 +15,6 @@
 
 config = (
     PPOConfig()
-    # Enable new API stack and use EnvRunner.
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    )
     .env_runners(
         rollout_fragment_length=1000, num_env_runners=0, batch_mode="truncate_episodes"
     )
diff --git a/rllib/tuned_examples/dqn/benchmark_dqn_atari.py b/rllib/tuned_examples/dqn/benchmark_dqn_atari.py
index b79817b9e976..4cbe1fa6e204 100644
--- a/rllib/tuned_examples/dqn/benchmark_dqn_atari.py
+++ b/rllib/tuned_examples/dqn/benchmark_dqn_atari.py
@@ -302,11 +302,6 @@ def stop_all(self):
         },
         clip_rewards=True,
     )
-    # Enable new API stack and use EnvRunner.
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    )
     .env_runners(
         # Every 4 agent steps a training update is performed.
         rollout_fragment_length=4,
diff --git a/rllib/tuned_examples/dqn/benchmark_dqn_atari_rllib_preprocessing.py b/rllib/tuned_examples/dqn/benchmark_dqn_atari_rllib_preprocessing.py
index e76bf40cdcdd..2e8deb84d354 100644
--- a/rllib/tuned_examples/dqn/benchmark_dqn_atari_rllib_preprocessing.py
+++ b/rllib/tuned_examples/dqn/benchmark_dqn_atari_rllib_preprocessing.py
@@ -295,11 +295,6 @@ def stop_all(self):
         },
         clip_rewards=True,
     )
-    # Enable new API stack and use EnvRunner.
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    )
     .env_runners(
         # Every 4 agent steps a training update is performed.
         rollout_fragment_length=4,
diff --git a/rllib/tuned_examples/dqn/cartpole_dqn.py b/rllib/tuned_examples/dqn/cartpole_dqn.py
index 6b417a9c9782..821ff7c8d5bb 100644
--- a/rllib/tuned_examples/dqn/cartpole_dqn.py
+++ b/rllib/tuned_examples/dqn/cartpole_dqn.py
@@ -13,10 +13,6 @@
 
 config = (
     DQNConfig()
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    )
     .environment(env="CartPole-v1")
     .training(
         lr=0.0005 * (args.num_gpus or 1) ** 0.5,
diff --git a/rllib/tuned_examples/dqn/multi_agent_cartpole_dqn.py b/rllib/tuned_examples/dqn/multi_agent_cartpole_dqn.py
index 280822465c5f..726ec923cf99 100644
--- a/rllib/tuned_examples/dqn/multi_agent_cartpole_dqn.py
+++ b/rllib/tuned_examples/dqn/multi_agent_cartpole_dqn.py
@@ -25,10 +25,6 @@
 
 config = (
     DQNConfig()
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    )
     .environment(env="multi_agent_cartpole", env_config={"num_agents": args.num_agents})
     .training(
         lr=0.00065 * (args.num_gpus or 1) ** 0.5,
diff --git a/rllib/tuned_examples/ppo/benchmark_ppo_mujoco.py b/rllib/tuned_examples/ppo/benchmark_ppo_mujoco.py
index ba8be549c89c..e29a99ebc155 100644
--- a/rllib/tuned_examples/ppo/benchmark_ppo_mujoco.py
+++ b/rllib/tuned_examples/ppo/benchmark_ppo_mujoco.py
@@ -85,11 +85,6 @@ def stop_all(self):
 config = (
     PPOConfig()
     .environment(env=tune.grid_search(list(benchmark_envs.keys())))
-    # Enable new API stack and use EnvRunner.
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    )
     .env_runners(
         # Following the paper.
         num_env_runners=32,
diff --git a/rllib/tuned_examples/ppo/benchmark_ppo_mujoco_pb2.py b/rllib/tuned_examples/ppo/benchmark_ppo_mujoco_pb2.py
index 8116a2431cd5..c3a4c37c1437 100644
--- a/rllib/tuned_examples/ppo/benchmark_ppo_mujoco_pb2.py
+++ b/rllib/tuned_examples/ppo/benchmark_ppo_mujoco_pb2.py
@@ -69,11 +69,6 @@
     config = (
         PPOConfig()
         .environment(env=env)
-        # Enable new API stack and use EnvRunner.
-        .api_stack(
-            enable_rl_module_and_learner=True,
-            enable_env_runner_and_connector_v2=True,
-        )
         .env_runners(
             rollout_fragment_length=1,
             num_env_runners=num_rollout_workers,
diff --git a/rllib/tuned_examples/ppo/cartpole-ppo.yaml b/rllib/tuned_examples/ppo/cartpole-ppo.yaml
deleted file mode 100644
index 94a093eec3b3..000000000000
--- a/rllib/tuned_examples/ppo/cartpole-ppo.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-# @OldAPIStack
-cartpole-ppo:
-    env: CartPole-v1
-    run: PPO
-    stop:
-        env_runners/episode_return_mean: 150
-        timesteps_total: 100000
-    config:
-        # Works for both torch and tf2.
-        framework: torch
-        gamma: 0.99
-        lr: 0.0003
-        num_env_runners: 1
-        num_epochs: 6
-        vf_loss_coeff: 0.01
-        model:
-            fcnet_hiddens: [32]
-            fcnet_activation: linear
-            vf_share_layers: true
diff --git a/rllib/tuned_examples/ppo/cartpole_truncated_ppo.py b/rllib/tuned_examples/ppo/cartpole_truncated_ppo.py
index 8d9405d7560f..523eaf0996f4 100644
--- a/rllib/tuned_examples/ppo/cartpole_truncated_ppo.py
+++ b/rllib/tuned_examples/ppo/cartpole_truncated_ppo.py
@@ -25,11 +25,6 @@
 
 config = (
     PPOConfig()
-    # Enable new API stack and use EnvRunner.
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    )
     .environment("cartpole_truncated")
     .env_runners(num_envs_per_env_runner=10)
     .training(
@@ -39,7 +34,10 @@
     )
     # For evaluation, use the "real" CartPole-v1 env (up to 500 steps).
     .evaluation(
-        evaluation_config=PPOConfig.overrides(env="CartPole-v1"),
+        evaluation_config=PPOConfig.overrides(
+            env="CartPole-v1",
+            explore=False,
+        ),
         evaluation_interval=1,
         evaluation_num_env_runners=1,
     )
@@ -47,7 +45,7 @@
 
 stop = {
     f"{NUM_ENV_STEPS_SAMPLED_LIFETIME}": 500000,
-    f"{EVALUATION_RESULTS}/{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": 200.0,
+    f"{EVALUATION_RESULTS}/{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": 80.0,
 }
 
 
diff --git a/rllib/tuned_examples/ppo/memory_leak_test_ppo_new_stack.py b/rllib/tuned_examples/ppo/memory_leak_test_ppo_new_stack.py
index bd3794daf41d..deb56f84ca02 100644
--- a/rllib/tuned_examples/ppo/memory_leak_test_ppo_new_stack.py
+++ b/rllib/tuned_examples/ppo/memory_leak_test_ppo_new_stack.py
@@ -4,10 +4,6 @@
 
 config = (
     PPOConfig()
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    )
     # Switch off np.random, which is known to have memory leaks.
     .environment(RandomLargeObsSpaceEnv, env_config={"static_samples": True})
     .env_runners(
diff --git a/rllib/tuned_examples/ppo/multi_agent_cartpole_ppo.py b/rllib/tuned_examples/ppo/multi_agent_cartpole_ppo.py
index 7e4f74ea50a8..8130cdda1af9 100644
--- a/rllib/tuned_examples/ppo/multi_agent_cartpole_ppo.py
+++ b/rllib/tuned_examples/ppo/multi_agent_cartpole_ppo.py
@@ -22,10 +22,6 @@
 
 config = (
     PPOConfig()
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    )
     .environment("multi_agent_cartpole", env_config={"num_agents": args.num_agents})
     .rl_module(
         model_config=DefaultModelConfig(
diff --git a/rllib/tuned_examples/ppo/multi_agent_pendulum_ppo.py b/rllib/tuned_examples/ppo/multi_agent_pendulum_ppo.py
index 9ad40c4c2b47..92e364b4343c 100644
--- a/rllib/tuned_examples/ppo/multi_agent_pendulum_ppo.py
+++ b/rllib/tuned_examples/ppo/multi_agent_pendulum_ppo.py
@@ -23,10 +23,6 @@
 
 config = (
     PPOConfig()
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    )
     .environment("multi_agent_pendulum", env_config={"num_agents": args.num_agents})
     .env_runners(
         env_to_module_connector=lambda env: MeanStdFilter(multi_agent=True),
diff --git a/rllib/tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py b/rllib/tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py
index d700cb7ab0c8..3e63f299793e 100644
--- a/rllib/tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py
+++ b/rllib/tuned_examples/ppo/multi_agent_stateless_cartpole_ppo.py
@@ -27,11 +27,6 @@
 
 config = (
     PPOConfig()
-    # Enable new API stack and use EnvRunner.
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    )
     .environment("multi_stateless_cart")
     .env_runners(
         env_to_module_connector=lambda env: MeanStdFilter(multi_agent=True),
diff --git a/rllib/tuned_examples/ppo/pendulum-ppo.yaml b/rllib/tuned_examples/ppo/pendulum-ppo.yaml
deleted file mode 100644
index 7ab57c621a97..000000000000
--- a/rllib/tuned_examples/ppo/pendulum-ppo.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-# @OldAPIStack
-# Can expect improvement to -140 reward in ~300-500k timesteps.
-pendulum-ppo:
-    env: Pendulum-v1
-    run: PPO
-    stop:
-        env_runners/episode_return_mean: -400
-        timesteps_total: 400000
-    config:
-        # Works for both torch and tf.
-        framework: torch
-        train_batch_size: 512
-        vf_clip_param: 10.0
-        num_env_runners: 0
-        num_envs_per_env_runner: 20
-        lambda: 0.1
-        gamma: 0.95
-        lr: 0.0003
-        minibatch_size: 64
-        observation_filter: MeanStdFilter
-        model:
-            fcnet_activation: relu
diff --git a/rllib/tuned_examples/ppo/repeatafterme-ppo-lstm.yaml b/rllib/tuned_examples/ppo/repeatafterme-ppo-lstm.yaml
deleted file mode 100644
index 490b63245f15..000000000000
--- a/rllib/tuned_examples/ppo/repeatafterme-ppo-lstm.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# @OldAPIStack
-repeat-after-me-ppo-w-lstm:
-    # Default case: Discrete(2) observations/actions.
-    env: ray.rllib.examples.envs.classes.repeat_after_me_env.RepeatAfterMeEnv
-    run: PPO
-    stop:
-        env_runners/episode_return_mean: 50
-        timesteps_total: 100000
-    config:
-        # Works for both torch and tf.
-        framework: torch
-        # Make env partially observable.
-        env_config:
-          config:
-            repeat_delay: 2
-        gamma: 0.9
-        lr: 0.0003
-        num_env_runners: 0
-        num_envs_per_env_runner: 20
-        num_epochs: 5
-        entropy_coeff: 0.00001
-        model:
-            use_lstm: true
-            lstm_cell_size: 64
-            max_seq_len: 20
-            fcnet_hiddens: [64]
-            vf_share_layers: true
diff --git a/rllib/tuned_examples/sac/benchmark_sac_mujoco.py b/rllib/tuned_examples/sac/benchmark_sac_mujoco.py
index d461730641a0..2ec59d9f3af8 100644
--- a/rllib/tuned_examples/sac/benchmark_sac_mujoco.py
+++ b/rllib/tuned_examples/sac/benchmark_sac_mujoco.py
@@ -76,11 +76,6 @@ def stop_all(self):
 config = (
     SACConfig()
     .environment(env=tune.grid_search(list(benchmark_envs.keys())))
-    # Enable new API stack and use EnvRunner.
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    )
     .env_runners(
         rollout_fragment_length=1,
         num_env_runners=0,
diff --git a/rllib/tuned_examples/sac/benchmark_sac_mujoco_pb2.py b/rllib/tuned_examples/sac/benchmark_sac_mujoco_pb2.py
index 66d4a1f46d5f..8ac4faf0b188 100644
--- a/rllib/tuned_examples/sac/benchmark_sac_mujoco_pb2.py
+++ b/rllib/tuned_examples/sac/benchmark_sac_mujoco_pb2.py
@@ -63,11 +63,6 @@
     config = (
         SACConfig()
         .environment(env=env)
-        # Enable new API stack and use EnvRunner.
-        .api_stack(
-            enable_rl_module_and_learner=True,
-            enable_env_runner_and_connector_v2=True,
-        )
         .env_runners(
             rollout_fragment_length="auto",
             num_env_runners=1,
diff --git a/rllib/tuned_examples/sac/halfcheetah_sac.py b/rllib/tuned_examples/sac/halfcheetah_sac.py
index dd9d28c715c0..d763631035b8 100644
--- a/rllib/tuned_examples/sac/halfcheetah_sac.py
+++ b/rllib/tuned_examples/sac/halfcheetah_sac.py
@@ -16,10 +16,6 @@
 
 config = (
     SACConfig()
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    )
     .environment("HalfCheetah-v4")
     .training(
         initial_alpha=1.001,
diff --git a/rllib/tuned_examples/sac/humanoid_sac.py b/rllib/tuned_examples/sac/humanoid_sac.py
index 8ecba7d4cfa0..8311ee6dc134 100644
--- a/rllib/tuned_examples/sac/humanoid_sac.py
+++ b/rllib/tuned_examples/sac/humanoid_sac.py
@@ -25,10 +25,6 @@
 
 config = (
     SACConfig()
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    )
     .environment("Humanoid-v4")
     .training(
         initial_alpha=1.001,
diff --git a/rllib/tuned_examples/sac/multi_agent_pendulum_sac.py b/rllib/tuned_examples/sac/multi_agent_pendulum_sac.py
index 481c61e3824b..2d2729b6c10c 100644
--- a/rllib/tuned_examples/sac/multi_agent_pendulum_sac.py
+++ b/rllib/tuned_examples/sac/multi_agent_pendulum_sac.py
@@ -27,10 +27,6 @@
 
 config = (
     SACConfig()
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    )
     .environment("multi_agent_pendulum", env_config={"num_agents": args.num_agents})
     .training(
         initial_alpha=1.001,
diff --git a/rllib/tuned_examples/sac/pendulum_sac.py b/rllib/tuned_examples/sac/pendulum_sac.py
index 16635e32c96a..466b7fc09413 100644
--- a/rllib/tuned_examples/sac/pendulum_sac.py
+++ b/rllib/tuned_examples/sac/pendulum_sac.py
@@ -15,10 +15,6 @@
 
 config = (
     SACConfig()
-    .api_stack(
-        enable_rl_module_and_learner=True,
-        enable_env_runner_and_connector_v2=True,
-    )
     .environment("Pendulum-v1")
     .training(
         initial_alpha=1.001,
diff --git a/rllib/utils/exploration/tests/test_curiosity.py b/rllib/utils/exploration/tests/test_curiosity.py
index bcc603171264..adbab1cf4573 100644
--- a/rllib/utils/exploration/tests/test_curiosity.py
+++ b/rllib/utils/exploration/tests/test_curiosity.py
@@ -50,6 +50,10 @@ def test_curiosity_on_frozen_lake(self):
 
         config = (
             ppo.PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
             # A very large frozen-lake that's hard for a random policy to solve
             # due to 0.0 feedback.
             .environment(
@@ -88,7 +92,8 @@ def test_curiosity_on_frozen_lake(self):
                         "type": "StochasticSampling",
                     },
                 },
-            ).training(lr=0.001)
+            )
+            .training(lr=0.001)
         )
 
         num_iterations = 10
diff --git a/rllib/utils/exploration/tests/test_explorations.py b/rllib/utils/exploration/tests/test_explorations.py
index e7e29002e8f0..c8a1c14c1932 100644
--- a/rllib/utils/exploration/tests/test_explorations.py
+++ b/rllib/utils/exploration/tests/test_explorations.py
@@ -87,7 +87,13 @@ def test_impala(self):
 
     def test_ppo_discr(self):
         config = (
-            ppo.PPOConfig().environment("CartPole-v1").env_runners(num_env_runners=0)
+            ppo.PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
+            .environment("CartPole-v1")
+            .env_runners(num_env_runners=0)
         )
         do_test_explorations(
             config,
@@ -97,7 +103,13 @@ def test_ppo_discr(self):
 
     def test_ppo_cont(self):
         config = (
-            ppo.PPOConfig().environment("Pendulum-v1").env_runners(num_env_runners=0)
+            ppo.PPOConfig()
+            .api_stack(
+                enable_env_runner_and_connector_v2=False,
+                enable_rl_module_and_learner=False,
+            )
+            .environment("Pendulum-v1")
+            .env_runners(num_env_runners=0)
         )
         do_test_explorations(
             config,
diff --git a/rllib/utils/test_utils.py b/rllib/utils/test_utils.py
index f9dd0e2edb1a..95724d911bfe 100644
--- a/rllib/utils/test_utils.py
+++ b/rllib/utils/test_utils.py
@@ -1,6 +1,4 @@
 import argparse
-from collections import Counter
-import copy
 import json
 import logging
 import os
@@ -18,7 +16,6 @@
     Type,
     Union,
 )
-import yaml
 
 import gymnasium as gym
 from gymnasium.spaces import Box, Discrete, MultiDiscrete, MultiBinary
@@ -31,12 +28,9 @@
 from ray import air, tune
 from ray.air.constants import TRAINING_ITERATION
 from ray.air.integrations.wandb import WandbLoggerCallback, WANDB_ENV_VAR
-from ray.rllib.common import SupportedFileType
 from ray.rllib.core import DEFAULT_MODULE_ID, Columns
 from ray.rllib.env.wrappers.atari_wrappers import is_atari, wrap_deepmind
-from ray.rllib.train import load_experiments_from_file
 from ray.rllib.utils.annotations import OldAPIStack
-from ray.rllib.utils.deprecation import Deprecated
 from ray.rllib.utils.framework import try_import_jax, try_import_tf, try_import_torch
 from ray.rllib.utils.metrics import (
     DIFF_NUM_GRAD_UPDATES_VS_SAMPLER_POLICY,
@@ -44,15 +38,13 @@
     EPISODE_RETURN_MEAN,
     EVALUATION_RESULTS,
     NUM_ENV_STEPS_TRAINED,
-    NUM_ENV_STEPS_TRAINED_LIFETIME,
     NUM_ENV_STEPS_SAMPLED_LIFETIME,
-    NUM_EPISODES_LIFETIME,
 )
 from ray.rllib.utils.typing import ResultDict
 from ray.rllib.utils.error import UnsupportedSpaceException
 
 
-from ray.tune import CLIReporter, run_experiments
+from ray.tune import CLIReporter
 
 
 if TYPE_CHECKING:
@@ -958,326 +950,6 @@ def check_train_results(train_results: ResultDict):
     return train_results
 
 
-@Deprecated(new="run_learning_tests_from_yaml_or_py(config_files=...)", error=False)
-def run_learning_tests_from_yaml(
-    yaml_files: List[str],
-    *,
-    framework: Optional[str] = None,
-    max_num_repeats: int = 2,
-    use_pass_criteria_as_stop: bool = True,
-    smoke_test: bool = False,
-):
-    return run_learning_tests_from_yaml_or_py(
-        yaml_files,
-        framework=framework,
-        max_num_repeats=max_num_repeats,
-        use_pass_criteria_as_stop=use_pass_criteria_as_stop,
-        smoke_test=smoke_test,
-    )
-
-
-def run_learning_tests_from_yaml_or_py(
-    config_files: List[str],
-    *,
-    framework: Optional[str] = None,
-    max_num_repeats: int = 2,
-    use_pass_criteria_as_stop: bool = True,
-    smoke_test: bool = False,
-) -> Dict[str, Any]:
-    """Runs the given experiments in config_files and returns results dict.
-
-    Args:
-        framework: The framework to use for running this test. If None,
-            run the test on all frameworks.
-        config_files: List of yaml or py config file names.
-        max_num_repeats: How many times should we repeat a failed
-            experiment?
-        use_pass_criteria_as_stop: Configure the Trial so that it stops
-            as soon as pass criterias are met.
-        smoke_test: Whether this is just a smoke-test. If True,
-            set time_total_s to 5min and don't early out due to rewards
-            or timesteps reached.
-
-    Returns:
-        A results dict mapping strings (e.g. "time_taken", "stats", "passed") to
-            the respective stats/values.
-    """
-    print("Will run the following config files:")
-    for config_file in config_files:
-        print("->", config_file)
-
-    # All trials we'll ever run in this test script.
-    all_trials = []
-    # The experiments (by name) we'll run up to `max_num_repeats` times.
-    experiments = {}
-    # The results per experiment.
-    checks = {}
-    # Metrics per experiment.
-    stats = {}
-
-    start_time = time.monotonic()
-
-    def should_check_eval(experiment):
-        # If we have evaluation workers, use their rewards.
-        # This is useful for offline learning tests, where
-        # we evaluate against an actual environment.
-        return bool(experiment["config"].get("evaluation_interval"))
-
-    # Loop through all collected files and gather experiments.
-    # Set correct framework(s).
-    for config_file in config_files:
-        # For python files, need to make sure, we only deliver the module name into the
-        # `load_experiments_from_file` function (everything from "/ray/rllib" on).
-        if config_file.endswith(".py"):
-            if config_file.endswith(
-                "__init__.py"
-            ):  # weird CI learning test (BAZEL) case
-                continue
-            tf_experiments = load_experiments_from_file(
-                config_file, SupportedFileType.python
-            )
-        else:
-            tf_experiments = load_experiments_from_file(
-                config_file, SupportedFileType.yaml
-            )
-
-        # Add torch version of all experiments to the list.
-        for k, e in tf_experiments.items():
-            # If framework given as arg, use that framework.
-            if framework is not None:
-                frameworks = [framework]
-            # If framework given in config, only test for that framework.
-            # Some algos do not have both versions available.
-            elif "frameworks" in e:
-                frameworks = e["frameworks"]
-            else:
-                # By default we don't run tf2, because tf2's multi-gpu support
-                # isn't complete yet.
-                frameworks = ["tf", "torch"]
-            # Pop frameworks key to not confuse Tune.
-            e.pop("frameworks", None)
-
-            e["stop"] = e["stop"] if "stop" in e else {}
-            e["pass_criteria"] = e["pass_criteria"] if "pass_criteria" in e else {}
-
-            check_eval = should_check_eval(e)
-            episode_reward_key = (
-                f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}"
-                if not check_eval
-                else f"{EVALUATION_RESULTS}/{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}"
-            )
-
-            # For smoke-tests, we just run for n min.
-            if smoke_test:
-                # 0sec for each(!) experiment/trial.
-                # This is such that if there are many experiments/trials
-                # in a test (e.g. rllib_learning_test), each one can at least
-                # create its Algorithm and run a first iteration.
-                e["stop"]["time_total_s"] = 0
-            else:
-                if use_pass_criteria_as_stop:
-                    # We also stop early, once we reach the desired reward.
-                    min_reward = e.get("pass_criteria", {}).get(episode_reward_key)
-                    if min_reward is not None:
-                        e["stop"][episode_reward_key] = min_reward
-
-            # Generate `checks` dict for all experiments
-            # (tf, tf2 and/or torch).
-            for framework in frameworks:
-                k_ = k + "-" + framework
-                ec = copy.deepcopy(e)
-                ec["config"]["framework"] = framework
-                if framework == "tf2":
-                    ec["config"]["eager_tracing"] = True
-
-                checks[k_] = {
-                    "min_reward": ec["pass_criteria"].get(episode_reward_key, 0.0),
-                    "min_throughput": ec["pass_criteria"].get("timesteps_total", 0.0)
-                    / (ec["stop"].get("time_total_s", 1.0) or 1.0),
-                    "time_total_s": ec["stop"].get("time_total_s"),
-                    "failures": 0,
-                    "passed": False,
-                }
-                # This key would break tune.
-                ec.pop("pass_criteria", None)
-
-                # One experiment to run.
-                experiments[k_] = ec
-
-    # Keep track of those experiments we still have to run.
-    # If an experiment passes, we'll remove it from this dict.
-    experiments_to_run = experiments.copy()
-
-    # When running as a release test, use `/mnt/cluster_storage` as the storage path.
-    release_test_storage_path = "/mnt/cluster_storage"
-    if os.path.exists(release_test_storage_path):
-        for k, e in experiments_to_run.items():
-            e["storage_path"] = release_test_storage_path
-
-    try:
-        ray.init(address="auto")
-    except ConnectionError:
-        ray.init()
-
-    for i in range(max_num_repeats):
-        # We are done.
-        if len(experiments_to_run) == 0:
-            print("All experiments finished.")
-            break
-
-        print(f"Starting learning test iteration {i}...")
-
-        # Print out the actual config.
-        print("== Test config ==")
-        print(yaml.dump(experiments_to_run))
-
-        # Run remaining experiments.
-        trials = run_experiments(
-            experiments_to_run,
-            resume=False,
-            verbose=2,
-            progress_reporter=CLIReporter(
-                metric_columns={
-                    TRAINING_ITERATION: "iter",
-                    "time_total_s": "time_total_s",
-                    NUM_ENV_STEPS_SAMPLED_LIFETIME: "ts (sampled)",
-                    NUM_ENV_STEPS_TRAINED_LIFETIME: "ts (trained)",
-                    NUM_EPISODES_LIFETIME: "train_episodes",
-                    f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": "reward_mean",
-                    (
-                        f"{EVALUATION_RESULTS}/{ENV_RUNNER_RESULTS}/"
-                        f"{EPISODE_RETURN_MEAN}"
-                    ): "eval_reward_mean",
-                },
-                parameter_columns=["framework"],
-                sort_by_metric=True,
-                max_report_frequency=30,
-            ),
-        )
-
-        all_trials.extend(trials)
-
-        # Check each experiment for whether it passed.
-        # Criteria is to a) reach reward AND b) to have reached the throughput
-        # defined by `NUM_ENV_STEPS_(SAMPLED|TRAINED)` / `time_total_s`.
-        for experiment in experiments_to_run.copy():
-            print(f"Analyzing experiment {experiment} ...")
-            # Collect all trials within this experiment (some experiments may
-            # have num_samples or grid_searches defined).
-            trials_for_experiment = []
-            for t in trials:
-                trial_exp = re.sub(".+/([^/]+)$", "\\1", t.local_dir)
-                if trial_exp == experiment:
-                    trials_for_experiment.append(t)
-            print(f" ... Trials: {trials_for_experiment}.")
-
-            check_eval = should_check_eval(experiments[experiment])
-
-            # Error: Increase failure count and repeat.
-            if any(t.status == "ERROR" for t in trials_for_experiment):
-                print(" ... ERROR.")
-                checks[experiment]["failures"] += 1
-            # Smoke-tests always succeed.
-            elif smoke_test:
-                print(" ... SMOKE TEST (mark ok).")
-                checks[experiment]["passed"] = True
-                del experiments_to_run[experiment]
-            # Experiment finished: Check reward achieved and timesteps done
-            # (throughput).
-            else:
-                # Use best_result's reward to check min_reward.
-                if check_eval:
-                    episode_return_mean = np.mean(
-                        [
-                            t.metric_analysis[
-                                f"{EVALUATION_RESULTS}/{ENV_RUNNER_RESULTS}/"
-                                f"{EPISODE_RETURN_MEAN}"
-                            ]["max"]
-                            for t in trials_for_experiment
-                        ]
-                    )
-                else:
-                    episode_return_mean = np.mean(
-                        [
-                            t.metric_analysis[
-                                f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}"
-                            ]["max"]
-                            for t in trials_for_experiment
-                        ]
-                    )
-                desired_reward = checks[experiment]["min_reward"]
-
-                # Use last_result["timesteps_total"] to check throughput.
-                timesteps_total = np.mean(
-                    [t.last_result["timesteps_total"] for t in trials_for_experiment]
-                )
-                total_time_s = np.mean(
-                    [t.last_result["time_total_s"] for t in trials_for_experiment]
-                )
-
-                # TODO(jungong) : track training- and env throughput separately.
-                throughput = timesteps_total / (total_time_s or 1.0)
-                # Throughput verification is not working. Many algorithm, e.g. TD3,
-                # achieves the learning goal, but fails the throughput check
-                # miserably.
-                # TODO(jungong): Figure out why.
-                #
-                # desired_throughput = checks[experiment]["min_throughput"]
-                desired_throughput = None
-
-                # Record performance.
-                stats[experiment] = {
-                    "episode_reward_mean": float(episode_return_mean),
-                    "throughput": (
-                        float(throughput) if throughput is not None else 0.0
-                    ),
-                }
-
-                print(
-                    f" ... Desired reward={desired_reward}; "
-                    f"desired throughput={desired_throughput}"
-                )
-
-                # We failed to reach desired reward or the desired throughput.
-                if (desired_reward and episode_return_mean < desired_reward) or (
-                    desired_throughput and throughput < desired_throughput
-                ):
-                    print(
-                        " ... Not successful: Actual "
-                        f"return={episode_return_mean}; "
-                        f"actual throughput={throughput}"
-                    )
-                    checks[experiment]["failures"] += 1
-                # We succeeded!
-                else:
-                    print(
-                        " ... Successful: (mark ok). Actual "
-                        f"return={episode_return_mean}; "
-                        f"actual throughput={throughput}"
-                    )
-                    checks[experiment]["passed"] = True
-                    del experiments_to_run[experiment]
-
-    ray.shutdown()
-
-    time_taken = time.monotonic() - start_time
-
-    # Create results dict and write it to disk.
-    result = {
-        "time_taken": float(time_taken),
-        "trial_states": dict(Counter([trial.status for trial in all_trials])),
-        "last_update": float(time.time()),
-        "stats": stats,
-        "passed": [k for k, exp in checks.items() if exp["passed"]],
-        "not_passed": [k for k, exp in checks.items() if not exp["passed"]],
-        "failures": {
-            k: exp["failures"] for k, exp in checks.items() if exp["failures"] > 0
-        },
-    }
-
-    return result
-
-
 # TODO (sven): Make this the de-facto, well documented, and unified utility for most of
 #  our tests:
 #  - CI (label: "learning_tests")