diff --git a/rllib/BUILD b/rllib/BUILD
index 16af480689f2b..5cd99351b97c0 100644
--- a/rllib/BUILD
+++ b/rllib/BUILD
@@ -2086,6 +2086,27 @@ py_test(
 # tagged by @OldAPIStack and/or @HybridAPIStack
 # ----------------------
 
+# subdirectory: actions/
+
+# Nested action spaces (flattening obs and learning w/ multi-action distribution).
+py_test(
+    name = "examples/actions/nested_action_spaces_ppo",
+    main = "examples/actions/nested_action_spaces.py",
+    tags = ["team:rllib", "exclusive", "examples"],
+    size = "large",
+    srcs = ["examples/actions/nested_action_spaces.py"],
+    args = ["--enable-new-api-stack", "--as-test", "--framework=torch", "--stop-reward=-500.0", "--algo=PPO"]
+)
+
+py_test(
+    name = "examples/actions/nested_action_spaces_multi_agent_ppo",
+    main = "examples/actions/nested_action_spaces.py",
+    tags = ["team:rllib", "exclusive", "examples"],
+    size = "large",
+    srcs = ["examples/actions/nested_action_spaces.py"],
+    args = ["--enable-new-api-stack", "--as-test", "--num-agents=2", "--framework=torch", "--stop-reward=-1000.0", "--algo=PPO"]
+)
+
 # subdirectory: algorithms/
 
 #@OldAPIStack
@@ -2213,41 +2234,22 @@ py_test(
     args = ["--enable-new-api-stack", "--num-agents=2", "--stop-iter=2", "--framework=torch", "--algo=PPO", "--num-env-runners=4", "--num-cpus=6"]
 )
 
-# Nested action spaces (flattening obs and learning w/ multi-action distribution).
-py_test(
-    name = "examples/connectors/nested_action_spaces_ppo",
-    main = "examples/connectors/nested_action_spaces.py",
-    tags = ["team:rllib", "exclusive", "examples"],
-    size = "large",
-    srcs = ["examples/connectors/nested_action_spaces.py"],
-    args = ["--enable-new-api-stack", "--as-test", "--framework=torch", "--stop-reward=-500.0", "--algo=PPO"]
-)
-
-py_test(
-    name = "examples/connectors/nested_action_spaces_multi_agent_ppo",
-    main = "examples/connectors/nested_action_spaces.py",
-    tags = ["team:rllib", "exclusive", "examples"],
-    size = "large",
-    srcs = ["examples/connectors/nested_action_spaces.py"],
-    args = ["--enable-new-api-stack", "--as-test", "--num-agents=2", "--framework=torch", "--stop-reward=-1000.0", "--algo=PPO"]
-)
-
 # Nested observation spaces (flattening).
 py_test(
-    name = "examples/connectors/nested_observation_spaces_ppo",
-    main = "examples/connectors/nested_observation_spaces.py",
+    name = "examples/connectors/flatten_observations_dict_space_ppo",
+    main = "examples/connectors/flatten_observations_dict_space.py",
     tags = ["team:rllib", "exclusive", "examples"],
     size = "medium",
-    srcs = ["examples/connectors/nested_observation_spaces.py"],
+    srcs = ["examples/connectors/flatten_observations_dict_space.py"],
     args = ["--enable-new-api-stack", "--as-test", "--stop-reward=400.0", "--framework=torch", "--algo=PPO"]
 )
 
 py_test(
-    name = "examples/connectors/nested_observation_spaces_multi_agent_ppo",
-    main = "examples/connectors/nested_observation_spaces.py",
+    name = "examples/connectors/flatten_observations_dict_space_multi_agent_ppo",
+    main = "examples/connectors/flatten_observations_dict_space.py",
     tags = ["team:rllib", "exclusive", "examples"],
     size = "medium",
-    srcs = ["examples/connectors/nested_observation_spaces.py"],
+    srcs = ["examples/connectors/flatten_observations_dict_space.py"],
     args = ["--enable-new-api-stack", "--num-agents=2", "--as-test", "--stop-reward=800.0", "--framework=torch", "--algo=PPO"]
 )
 
diff --git a/rllib/connectors/connector_pipeline_v2.py b/rllib/connectors/connector_pipeline_v2.py
index f04376dcd9937..3156c66a7a69d 100644
--- a/rllib/connectors/connector_pipeline_v2.py
+++ b/rllib/connectors/connector_pipeline_v2.py
@@ -312,7 +312,7 @@ def _fix_spaces(self):
             obs_space = self.input_observation_space
             act_space = self.input_action_space
             for con in self.connectors:
-                con.input_observation_space = obs_space
                 con.input_action_space = act_space
+                con.input_observation_space = obs_space
                 obs_space = con.observation_space
                 act_space = con.action_space
diff --git a/rllib/connectors/connector_v2.py b/rllib/connectors/connector_v2.py
index ad7bd9eed4bb1..e43f7515faeab 100644
--- a/rllib/connectors/connector_v2.py
+++ b/rllib/connectors/connector_v2.py
@@ -84,8 +84,9 @@ def __init__(
         self._action_space = None
         self._input_observation_space = None
         self._input_action_space = None
-        self.input_observation_space = input_observation_space
+
         self.input_action_space = input_action_space
+        self.input_observation_space = input_observation_space
 
     @OverrideToImplementCustomLogic
     def recompute_observation_space_from_input_spaces(self) -> gym.Space:
diff --git a/rllib/connectors/env_to_module/__init__.py b/rllib/connectors/env_to_module/__init__.py
index 8f2750c9a8075..98b73bd9962bb 100644
--- a/rllib/connectors/env_to_module/__init__.py
+++ b/rllib/connectors/env_to_module/__init__.py
@@ -14,7 +14,7 @@
     FlattenObservations,
 )
 from ray.rllib.connectors.env_to_module.prev_actions_prev_rewards import (
-    PrevActionsPrevRewardsConnector,
+    PrevActionsPrevRewards,
 )
 from ray.rllib.connectors.env_to_module.write_observations_to_episodes import (
     WriteObservationsToEpisodes,
@@ -29,6 +29,6 @@
     "EnvToModulePipeline",
     "FlattenObservations",
     "NumpyToTensor",
-    "PrevActionsPrevRewardsConnector",
+    "PrevActionsPrevRewards",
     "WriteObservationsToEpisodes",
 ]
diff --git a/rllib/connectors/env_to_module/flatten_observations.py b/rllib/connectors/env_to_module/flatten_observations.py
index 1958f9e871d17..6a2e60173b65c 100644
--- a/rllib/connectors/env_to_module/flatten_observations.py
+++ b/rllib/connectors/env_to_module/flatten_observations.py
@@ -6,7 +6,6 @@
 import tree  # pip install dm_tree
 
 from ray.rllib.connectors.connector_v2 import ConnectorV2
-from ray.rllib.core.columns import Columns
 from ray.rllib.core.rl_module.rl_module import RLModule
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.numpy import flatten_inputs_to_1d_tensor
@@ -19,18 +18,12 @@
 class FlattenObservations(ConnectorV2):
     """A connector piece that flattens all observation components into a 1D array.
 
-    - Only works on data that has already been added to the batch.
-    - This connector makes the assumption that under the Columns.OBS key in batch,
-    there is either a list of individual env observations to be flattened (single-agent
-    case) or a dict mapping agent- and module IDs to lists of data items to be
-    flattened (multi-agent case).
-    - Does NOT work in a Learner pipeline as it operates on individual observation
-    items (as opposed to batched/time-ranked data).
-    - Therefore, assumes that the altered (flattened) observations will be written
-    back into the episode by a later connector piece in the env-to-module pipeline
-    (which this piece is part of as well).
-    - Does NOT read any information from the given list of Episode objects.
-    - Does NOT write any observations (or other data) to the given Episode objects.
+    - Works directly on the incoming episodes list and changes the last observation
+    in-place (write the flattened observation back into the episode).
+    - This connector does NOT alter the incoming batch (`data`) when called.
+    - This connector does NOT work in a `LearnerConnectorPipeline` because it requires
+    the incoming episodes to still be ongoing (in progress) as it only alters the
+    latest observation, not all observations in an episode.
 
     .. testcode::
 
@@ -38,6 +31,7 @@ class FlattenObservations(ConnectorV2):
         import numpy as np
 
         from ray.rllib.connectors.env_to_module import FlattenObservations
+        from ray.rllib.env.single_agent_episode import SingleAgentEpisode
         from ray.rllib.utils.test_utils import check
 
         # Some arbitrarily nested, complex observation space.
@@ -51,24 +45,26 @@ class FlattenObservations(ConnectorV2):
         })
         act_space = gym.spaces.Discrete(2)
 
-        # A batch of two example items, both coming from the above defined observation
-        # space.
-        batch = {
-            "obs": [
-                # 1st example item.
+        # Two example episodes, both with initial (reset) observations coming from the
+        # above defined observation space.
+        episode_1 = SingleAgentEpisode(
+            observations=[
                 {
                     "a": np.array(-10.0, np.float32),
                     "b": (1, np.array([[-1.0], [-1.0]], np.float32)),
                     "c": np.array([0, 2]),
                 },
-                # 2nd example item.
+            ],
+        )
+        episode_2 = SingleAgentEpisode(
+            observations=[
                 {
                     "a": np.array(10.0, np.float32),
                     "b": (0, np.array([[1.0], [1.0]], np.float32)),
                     "c": np.array([1, 1]),
                 },
             ],
-        }
+        )
 
         # Construct our connector piece.
         connector = FlattenObservations(obs_space, act_space)
@@ -76,23 +72,23 @@ class FlattenObservations(ConnectorV2):
         # Call our connector piece with the example data.
         output_data = connector(
             rl_module=None,  # This connector works without an RLModule.
-            data=batch,
-            episodes=[],  # This connector does not need the `episodes` input.
+            data={},  # This connector does not alter any data.
+            episodes=[episode_1, episode_2],
             explore=True,
             shared_data={},
         )
 
-        # The connector does not change the number of items in the data (still 2 items).
-        check(len(output_data["obs"]), 2)
+        # The connector does not alter the data and acts as pure pass-through.
+        check(output_data, {})
 
-        # The connector has flattened each item in the data to a 1D tensor.
+        # The connector has flattened each item in the episodes to a 1D tensor.
         check(
-            output_data["obs"][0],
+            episode_1.get_observations(0),
             #         box()  disc(2).  box(2, 1).  multidisc(2, 3)........
             np.array([-10.0, 0.0, 1.0, -1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 1.0]),
         )
         check(
-            output_data["obs"][1],
+            episode_2.get_observations(0),
             #         box()  disc(2).  box(2, 1).  multidisc(2, 3)........
             np.array([10.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]),
         )
@@ -169,40 +165,42 @@ def __call__(
         shared_data: Optional[dict] = None,
         **kwargs,
     ) -> Any:
-        observations = data.get(Columns.OBS)
-
-        if observations is None:
-            raise ValueError(
-                f"`batch` must already have a column named {Columns.OBS} in it "
-                f"for this connector to work!"
-            )
-
-        # Process each item under the Columns.OBS key individually and flatten
-        # it. We are using the `ConnectorV2.foreach_batch_item_change_in_place` API,
-        # allowing us to not worry about multi- or single-agent setups and returning
-        # the new version of each item we are iterating over.
-        self.foreach_batch_item_change_in_place(
-            batch=data,
-            column=Columns.OBS,
-            func=(
-                lambda item, eps_id, agent_id, module_id: (
-                    # Multi-agent AND skip this AgentID.
-                    item
-                    if self._agent_ids and agent_id not in self._agent_ids
-                    # Single-agent or flatten this AgentIDs observation.
-                    else flatten_inputs_to_1d_tensor(
-                        item,
+        for sa_episode in self.single_agent_episode_iterator(
+            episodes, agents_that_stepped_only=True
+        ):
+            # Episode is not finalized yet and thus still operates on lists of items.
+            assert not sa_episode.is_finalized
+
+            last_obs = sa_episode.get_observations(-1)
+
+            if self._multi_agent:
+                if (
+                    self._agent_ids is not None
+                    and sa_episode.agent_id not in self._agent_ids
+                ):
+                    flattened_obs = last_obs
+                else:
+                    flattened_obs = flatten_inputs_to_1d_tensor(
+                        inputs=last_obs,
                         # In the multi-agent case, we need to use the specific agent's
                         # space struct, not the multi-agent observation space dict.
-                        (
-                            self._input_obs_base_struct
-                            if not agent_id
-                            else self._input_obs_base_struct[agent_id]
-                        ),
-                        # Our items are bare observations (no batch axis present).
+                        spaces_struct=self._input_obs_base_struct[sa_episode.agent_id],
+                        # Our items are individual observations (no batch axis present).
                         batch_axis=False,
                     )
+            else:
+                flattened_obs = flatten_inputs_to_1d_tensor(
+                    inputs=last_obs,
+                    spaces_struct=self._input_obs_base_struct,
+                    # Our items are individual observations (no batch axis present).
+                    batch_axis=False,
                 )
-            ),
-        )
+
+            # Write new observation directly back into the episode.
+            sa_episode.set_observations(at_indices=-1, new_data=flattened_obs)
+            #  We set the Episode's observation space to ours so that we can safely
+            #  set the last obs to the new value (without causing a space mismatch
+            #  error).
+            sa_episode.observation_space = self.observation_space
+
         return data
diff --git a/rllib/connectors/env_to_module/mean_std_filter.py b/rllib/connectors/env_to_module/mean_std_filter.py
index e4709aff5b44e..c0bdf8bc65447 100644
--- a/rllib/connectors/env_to_module/mean_std_filter.py
+++ b/rllib/connectors/env_to_module/mean_std_filter.py
@@ -1,7 +1,7 @@
 from typing import Any, Dict, List, Optional
-from gymnasium.spaces import Discrete, MultiDiscrete
 
 import gymnasium as gym
+from gymnasium.spaces import Discrete, MultiDiscrete
 import numpy as np
 import tree
 
@@ -121,13 +121,10 @@ def __call__(
                 sa_obs, update=self._update_stats
             )
             sa_episode.set_observations(at_indices=-1, new_data=normalized_sa_obs)
-
-            if len(sa_episode) == 0:
-                # TODO (sven): This is kind of a hack.
-                #  We set the Episode's observation space to ours so that we can safely
-                #  set the last obs to the new value (without causing a space mismatch
-                #  error).
-                sa_episode.observation_space = self.observation_space
+            #  We set the Episode's observation space to ours so that we can safely
+            #  set the last obs to the new value (without causing a space mismatch
+            #  error).
+            sa_episode.observation_space = self.observation_space
 
         # Leave `data` as is. RLlib's default connector will automatically
         # populate the OBS column therein from the episodes' now transformed
diff --git a/rllib/connectors/env_to_module/prev_actions_prev_rewards.py b/rllib/connectors/env_to_module/prev_actions_prev_rewards.py
index 5a0222fceb0cd..5b26cd1f8b872 100644
--- a/rllib/connectors/env_to_module/prev_actions_prev_rewards.py
+++ b/rllib/connectors/env_to_module/prev_actions_prev_rewards.py
@@ -5,14 +5,13 @@
 import numpy as np
 
 from ray.rllib.connectors.connector_v2 import ConnectorV2
-from ray.rllib.core.columns import Columns
 from ray.rllib.core.rl_module.rl_module import RLModule
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.spaces.space_utils import batch, flatten_to_single_ndarray
 from ray.rllib.utils.typing import EpisodeType
 
 
-class PrevActionsPrevRewardsConnector(ConnectorV2):
+class PrevActionsPrevRewards(ConnectorV2):
     """A connector piece that adds previous rewards and actions to the input obs.
 
     - Requires Columns.OBS to be already a part of the batch.
@@ -36,13 +35,11 @@ class PrevActionsPrevRewardsConnector(ConnectorV2):
     """
 
     ORIG_OBS_KEY = "_orig_obs"
-    PREV_ACTIONS_KEY = "prev_actions"
-    PREV_REWARDS_KEY = "prev_rewards"
+    PREV_ACTIONS_KEY = "prev_n_actions"
+    PREV_REWARDS_KEY = "prev_n_rewards"
 
     @override(ConnectorV2)
     def recompute_observation_space_from_input_spaces(self):
-        if self.input_action_space is None:
-            return None
         if self._multi_agent:
             ret = {}
             for agent_id, obs_space in self.input_observation_space.spaces.items():
@@ -64,7 +61,7 @@ def __init__(
         n_prev_rewards: int = 1,
         **kwargs,
     ):
-        """Initializes a PrevActionsPrevRewardsConnector instance.
+        """Initializes a PrevActionsPrevRewards instance.
 
         Args:
             multi_agent: Whether this is a connector operating on a multi-agent
@@ -108,23 +105,16 @@ def __call__(
         shared_data: Optional[dict] = None,
         **kwargs,
     ) -> Any:
-        observations = data.get(Columns.OBS)
-
-        if observations is None:
-            raise ValueError(
-                f"`batch` must already have a column named {Columns.OBS} in it "
-                f"for this connector to work!"
-            )
-
-        new_obs = []
-        for sa_episode, orig_obs in self.single_agent_episode_iterator(
-            episodes, zip_with_batch_column=observations
+        for sa_episode in self.single_agent_episode_iterator(
+            episodes, agents_that_stepped_only=True
         ):
             # Episode is not finalized yet and thus still operates on lists of items.
             assert not sa_episode.is_finalized
 
+            augmented_obs = {self.ORIG_OBS_KEY: sa_episode.get_observations(-1)}
+
             if self.n_prev_actions:
-                prev_n_actions = flatten_to_single_ndarray(
+                augmented_obs[self.PREV_ACTIONS_KEY] = flatten_to_single_ndarray(
                     batch(
                         sa_episode.get_actions(
                             indices=slice(-self.n_prev_actions, None),
@@ -135,28 +125,19 @@ def __call__(
                 )
 
             if self.n_prev_rewards:
-                prev_n_rewards = np.array(
+                augmented_obs[self.PREV_REWARDS_KEY] = np.array(
                     sa_episode.get_rewards(
                         indices=slice(-self.n_prev_rewards, None),
                         fill=0.0,
                     )
                 )
 
-            new_obs.append(
-                {
-                    self.ORIG_OBS_KEY: orig_obs,
-                    self.PREV_ACTIONS_KEY: prev_n_actions,
-                    self.PREV_REWARDS_KEY: prev_n_rewards,
-                }
-            )
-
-        # Convert the observations in the batch into a dict with the keys:
-        # "_obs", "_prev_rewards", and "_prev_actions".
-        self.foreach_batch_item_change_in_place(
-            batch=data,
-            column=Columns.OBS,
-            func=lambda orig_obs, eps_id, agent_id, module_id: new_obs.pop(0),
-        )
+            # Write new observation directly back into the episode.
+            sa_episode.set_observations(at_indices=-1, new_data=augmented_obs)
+            #  We set the Episode's observation space to ours so that we can safely
+            #  set the last obs to the new value (without causing a space mismatch
+            #  error).
+            sa_episode.observation_space = self.observation_space
 
         return data
 
diff --git a/rllib/examples/actions/__init__.py b/rllib/examples/actions/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/rllib/examples/connectors/nested_action_spaces.py b/rllib/examples/actions/nested_action_spaces.py
similarity index 88%
rename from rllib/examples/connectors/nested_action_spaces.py
rename to rllib/examples/actions/nested_action_spaces.py
index 830b87fb25fb0..db7ad434c6743 100644
--- a/rllib/examples/connectors/nested_action_spaces.py
+++ b/rllib/examples/actions/nested_action_spaces.py
@@ -1,11 +1,7 @@
 from gymnasium.spaces import Dict, Tuple, Box, Discrete, MultiDiscrete
 
 from ray.tune.registry import register_env
-from ray.rllib.connectors.env_to_module import (
-    AddObservationsFromEpisodesToBatch,
-    FlattenObservations,
-    WriteObservationsToEpisodes,
-)
+from ray.rllib.connectors.env_to_module import FlattenObservations
 from ray.rllib.examples.envs.classes.multi_agent import (
     MultiAgentNestedSpaceRepeatAfterMeEnv,
 )
@@ -26,13 +22,13 @@
 if __name__ == "__main__":
     args = parser.parse_args()
 
+    assert (
+        args.enable_new_api_stack
+    ), "Must set --enable-new-api-stack when running this script!"
+
     # Define env-to-module-connector pipeline for the new stack.
     def _env_to_module_pipeline(env):
-        return [
-            AddObservationsFromEpisodesToBatch(),
-            FlattenObservations(multi_agent=args.num_agents > 0),
-            WriteObservationsToEpisodes(),
-        ]
+        return FlattenObservations(multi_agent=args.num_agents > 0)
 
     # Register our environment with tune.
     if args.num_agents > 0:
diff --git a/rllib/examples/checkpoints/checkpoint_by_custom_criteria.py b/rllib/examples/checkpoints/checkpoint_by_custom_criteria.py
index 0419a8ae1512e..33204e52d5e94 100644
--- a/rllib/examples/checkpoints/checkpoint_by_custom_criteria.py
+++ b/rllib/examples/checkpoints/checkpoint_by_custom_criteria.py
@@ -1,16 +1,16 @@
 """Example extracting a checkpoint from n trials using one or more custom criteria.
 
 This example:
-- runs a CartPole experiment with three different learning rates (three tune
-"trials"). During the experiment, for each trial, we create a checkpoint at each
-iteration.
-- at the end of the experiment, we compare the trials and pick the one that performed
-best, based on the criterion: Lowest episode count per single iteration (for CartPole,
-a low episode count means the episodes are very long and thus the reward is also very
-high).
-- from that best trial (with the lowest episode count), we then pick those checkpoints
-that a) have the lowest policy loss (good) and b) have the highest value function loss
-(bad).
+    - runs a CartPole experiment with three different learning rates (three tune
+    "trials"). During the experiment, for each trial, we create a checkpoint at each
+    iteration.
+    - at the end of the experiment, we compare the trials and pick the one that
+    performed best, based on the criterion: Lowest episode count per single iteration
+    (for CartPole, a low episode count means the episodes are very long and thus the
+    reward is also very high).
+    - from that best trial (with the lowest episode count), we then pick those
+    checkpoints that a) have the lowest policy loss (good) and b) have the highest value
+    function loss (bad).
 
 
 How to run this script
diff --git a/rllib/examples/checkpoints/continue_training_from_checkpoint.py b/rllib/examples/checkpoints/continue_training_from_checkpoint.py
index a8400659d9604..c52a7868b4e8e 100644
--- a/rllib/examples/checkpoints/continue_training_from_checkpoint.py
+++ b/rllib/examples/checkpoints/continue_training_from_checkpoint.py
@@ -4,15 +4,16 @@
 and you would therefore like to make your setup more robust and fault-tolerant.
 
 This example:
-- runs a single- or multi-agent CartPole experiment (for multi-agent, we use different
-learning rates) thereby checkpointing the state of the Algorithm every n iterations.
-- stops the experiment due to an expected crash in the algorithm's main process after
-a certain number of iterations.
-- just for testing purposes, restores the entire algorithm from the latest checkpoint
-and checks, whether the state of the restored algo exactly match the state of the
-crashed one.
-- then continues training with the restored algorithm until the desired final episode
-return is reached.
+    - runs a single- or multi-agent CartPole experiment (for multi-agent, we use
+    different learning rates) thereby checkpointing the state of the Algorithm every n
+    iterations.
+    - stops the experiment due to an expected crash in the algorithm's main process
+    after a certain number of iterations.
+    - just for testing purposes, restores the entire algorithm from the latest
+    checkpoint and checks, whether the state of the restored algo exactly match the
+    state of the crashed one.
+    - then continues training with the restored algorithm until the desired final
+    episode return is reached.
 
 
 How to run this script
diff --git a/rllib/examples/checkpoints/restore_1_of_n_agents_from_checkpoint.py b/rllib/examples/checkpoints/restore_1_of_n_agents_from_checkpoint.py
index fb53e2cb876f1..bf6889113fed3 100644
--- a/rllib/examples/checkpoints/restore_1_of_n_agents_from_checkpoint.py
+++ b/rllib/examples/checkpoints/restore_1_of_n_agents_from_checkpoint.py
@@ -4,12 +4,13 @@
 This example:
     - Runs a multi-agent `Pendulum-v1` experiment with >= 2 policies.
     - Saves a checkpoint of the `MultiAgentRLModule` used every `--checkpoint-freq`
-        iterations.
-    - Stops the experiments after the agents reach a combined return of `-800`.
+       iterations.
+    - Stops the experiments after the agents reach a combined return of -800.
     - Picks the best checkpoint by combined return and restores policy 0 from it.
     - Runs a second experiment with the restored `RLModule` for policy 0 and
         a fresh `RLModule` for the other policies.
-    - Stops the second experiment after the agents reach a combined return of `-800`.
+    - Stops the second experiment after the agents reach a combined return of -800.
+
 
 How to run this script
 ----------------------
@@ -34,6 +35,7 @@
 `--wandb-key=[your WandB API key] --wandb-project=[some project name]
 --wandb-run-name=[optional: WandB run name (within the defined project)]`
 
+
 Results to expect
 -----------------
 You should expect a reward of -400.0 eventually being achieved by a simple
diff --git a/rllib/examples/connectors/flatten_observations_dict_space.py b/rllib/examples/connectors/flatten_observations_dict_space.py
new file mode 100644
index 0000000000000..bed31ce5ac284
--- /dev/null
+++ b/rllib/examples/connectors/flatten_observations_dict_space.py
@@ -0,0 +1,157 @@
+"""Example using a ConnectorV2 to flatten arbitrarily nested dict or tuple observations.
+
+An RLlib Algorithm has 3 distinct connector pipelines:
+- An env-to-module pipeline in an EnvRunner accepting a list of episodes and producing
+a batch for an RLModule to compute actions (`forward_inference()` or
+`forward_exploration()`).
+- A module-to-env pipeline in an EnvRunner taking the RLModule's output and converting
+it into an action readable by the environment.
+- A learner connector pipeline on a Learner taking a list of episodes and producing
+a batch for an RLModule to perform the training forward pass (`forward_train()`).
+
+Each of these pipelines has a fixed set of default ConnectorV2 pieces that RLlib
+adds/prepends to these pipelines in order to perform the most basic functionalities.
+For example, RLlib adds the `AddObservationsFromEpisodesToBatch` ConnectorV2 into any
+env-to-module pipeline to make sure the batch for computing actions contains - at the
+minimum - the most recent observation.
+
+On top of these default ConnectorV2 pieces, users can define their own ConnectorV2
+pieces (or use the ones available already in RLlib) and add them to one of the 3
+different pipelines described above, as required.
+
+This example:
+    - shows how the `FlattenObservation` ConnectorV2 piece can be added to the
+    env-to-module pipeline.
+    - demonstrates that by using this connector, any arbitrarily nested dict or tuple
+    observations is properly flattened into a simple 1D tensor, for easier RLModule
+    processing.
+    - shows how - in a multi-agent setup - individual agents can be specified, whose
+    observations should be flattened (while other agents' observations will always
+    be left as-is).
+    - uses a variant of the CartPole-v1 environment, in which the 4 observation items
+    (x-pos, x-veloc, angle, and angle-veloc) are taken apart and put into a nested dict
+    with the structure:
+    {
+        "x-pos": [x-pos],
+        "angular-pos": {
+            "value": [angle],
+            "some_random_stuff": [random Discrete(3)],  # <- should be ignored by algo
+        },
+        "velocs": Tuple([x-veloc], [angle-veloc]),
+    }
+
+
+How to run this script
+----------------------
+`python [script file name].py --enable-new-api-stack`
+
+For debugging, use the following additional command line options
+`--no-tune --num-env-runners=0`
+which should allow you to set breakpoints anywhere in the RLlib code and
+have the execution stop there for inspection and debugging.
+
+For logging to your WandB account, use:
+`--wandb-key=[your WandB API key] --wandb-project=[some project name]
+--wandb-run-name=[optional: WandB run name (within the defined project)]`
+
+
+Results to expect
+-----------------
+
++---------------------+------------+----------------+--------+------------------+
+| Trial name          | status     | loc            |   iter |   total time (s) |
+|                     |            |                |        |                  |
+|---------------------+------------+----------------+--------+------------------+
+| PPO_env_a2fd6_00000 | TERMINATED | 127.0.0.1:7409 |     25 |          24.1426 |
++---------------------+------------+----------------+--------+------------------+
+------------------------+------------------------+------------------------+
+   num_env_steps_sample |   num_env_steps_traine |   episode_return_mean  |
+             d_lifetime |             d_lifetime |                        |
+------------------------+------------------------+------------------------|
+                 100000 |                 100000 |                 421.42 |
+------------------------+------------------------+------------------------+
+"""
+from ray.tune.registry import register_env
+from ray.rllib.connectors.env_to_module import FlattenObservations
+from ray.rllib.examples.envs.classes.cartpole_with_dict_observation_space import (
+    CartPoleWithDictObservationSpace,
+)
+from ray.rllib.examples.envs.classes.multi_agent import (
+    MultiAgentCartPoleWithDictObservationSpace,
+)
+from ray.rllib.utils.test_utils import (
+    add_rllib_example_script_args,
+    run_rllib_example_script_experiment,
+)
+from ray.tune.registry import get_trainable_cls
+
+
+# Read in common example script command line arguments.
+parser = add_rllib_example_script_args(default_timesteps=200000, default_reward=400.0)
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+
+    assert (
+        args.enable_new_api_stack
+    ), "Must set --enable-new-api-stack when running this script!"
+
+    # Define env-to-module-connector pipeline for the new stack.
+    def _env_to_module_pipeline(env):
+        return FlattenObservations(multi_agent=args.num_agents > 0)
+
+    # Register our environment with tune.
+    if args.num_agents > 0:
+        register_env(
+            "env",
+            lambda _: MultiAgentCartPoleWithDictObservationSpace(
+                config={"num_agents": args.num_agents}
+            ),
+        )
+    else:
+        register_env("env", lambda _: CartPoleWithDictObservationSpace())
+
+    # Define the AlgorithmConfig used.
+    config = (
+        get_trainable_cls(args.algo)
+        .get_default_config()
+        .environment("env")
+        .env_runners(env_to_module_connector=_env_to_module_pipeline)
+        .training(
+            gamma=0.99,
+            lr=0.0003,
+        )
+    )
+    if args.enable_new_api_stack:
+        config = config.rl_module(
+            model_config_dict={
+                "fcnet_hiddens": [32],
+                "fcnet_activation": "linear",
+                "vf_share_layers": True,
+                "uses_new_env_runners": True,
+            },
+        )
+    else:
+        config = config.training(
+            model=dict(
+                fcnet_hiddens=[32], fcnet_activation="linear", vf_share_layers=True
+            )
+        )
+
+    # Add a simple multi-agent setup.
+    if args.num_agents > 0:
+        config = config.multi_agent(
+            policies={f"p{i}" for i in range(args.num_agents)},
+            policy_mapping_fn=lambda aid, *a, **kw: f"p{aid}",
+        )
+
+    # Fix some PPO-specific settings.
+    if args.algo == "PPO":
+        config = config.training(
+            num_sgd_iter=6,
+            vf_loss_coeff=0.01,
+        )
+
+    # Run everything as configured.
+    run_rllib_example_script_experiment(config, args)
diff --git a/rllib/examples/connectors/frame_stacking.py b/rllib/examples/connectors/frame_stacking.py
index 6abce5582b0b8..e26918796ff43 100644
--- a/rllib/examples/connectors/frame_stacking.py
+++ b/rllib/examples/connectors/frame_stacking.py
@@ -1,15 +1,81 @@
-""" Example using connectors (V2) for frame-stacking in Atari environments.
+"""Example using 2 ConnectorV2 for observation frame-stacking in Atari environments.
+
+An RLlib Algorithm has 3 distinct connector pipelines:
+- An env-to-module pipeline in an EnvRunner accepting a list of episodes and producing
+a batch for an RLModule to compute actions (`forward_inference()` or
+`forward_exploration()`).
+- A module-to-env pipeline in an EnvRunner taking the RLModule's output and converting
+it into an action readable by the environment.
+- A learner connector pipeline on a Learner taking a list of episodes and producing
+a batch for an RLModule to perform the training forward pass (`forward_train()`).
+
+Each of these pipelines has a fixed set of default ConnectorV2 pieces that RLlib
+adds/prepends to these pipelines in order to perform the most basic functionalities.
+For example, RLlib adds the `AddObservationsFromEpisodesToBatch` ConnectorV2 into any
+env-to-module pipeline to make sure the batch for computing actions contains - at the
+minimum - the most recent observation.
+
+On top of these default ConnectorV2 pieces, users can define their own ConnectorV2
+pieces (or use the ones available already in RLlib) and add them to one of the 3
+different pipelines described above, as required.
+
+This example:
+    - shows how the `FrameStackingEnvToModule` ConnectorV2 piece can be added to the
+    env-to-module pipeline.
+    - shows how the `FrameStackingLearner` ConnectorV2 piece can be added to the
+    learner connector pipeline.
+    - demonstrates that using these two pieces (rather than performing framestacking
+    already inside the environment using a gymnasium wrapper) increases overall
+    performance by about 5%.
+
 
 How to run this script
 ----------------------
-`python [script file name].py --enable-new-api-stack`
+`python [script file name].py --enable-new-api-stack --num-frames=4 --env=ALE/Pong-v5`
+
+Use the `--num-frames` option to define the number of observations to framestack.
+If you don't want to use Connectors to perform the framestacking, set the
+`--use-gym-wrapper-framestacking` flag to perform framestacking already inside a
+gymnasium observation wrapper. In this case though, be aware that the tensors being
+sent through the network are `--num-frames` x larger than if you use the Connector
+setup.
+
 For debugging, use the following additional command line options
 `--no-tune --num-env-runners=0`
 which should allow you to set breakpoints anywhere in the RLlib code and
 have the execution stop there for inspection and debugging.
+
 For logging to your WandB account, use:
 `--wandb-key=[your WandB API key] --wandb-project=[some project name]
 --wandb-run-name=[optional: WandB run name (within the defined project)]`
+
+
+Results to expect
+-----------------
+
+With `--num-frames=4` and using the two extra ConnectorV2 pieces (in the env-to-module
+and learner connector pipelines), you should see something like this using:
+`--env ALE/Pong-v5 --num-gpus=4 --num-env-runners=95`
++---------------------------+------------+--------+------------------+...
+| Trial name                | status     |   iter |   total time (s) |
+|                           |            |        |                  |
+|---------------------------+------------+--------+------------------+...
+| PPO_atari-env_2fc4a_00000 | TERMINATED |    200 |          335.837 |
++---------------------------+------------+--------+------------------+...
+
+Note that the time to run these 200 iterations is about ~5% faster than when
+performing framestacking already inside the environment (using a
+`gymnasium.wrappers.ObservationWrapper`), due to the additional network traffic
+needed (sending back 4x[obs] batches instead of 1x[obs] to the learners).
+
+Thus, with the `--use-gym-wrapper-framestacking` option (all other options being equal),
+the output looks like this:
++---------------------------+------------+--------+------------------+...
+| Trial name                | status     |   iter |   total time (s) |
+|                           |            |        |                  |
+|---------------------------+------------+--------+------------------+...
+| PPO_atari-env_2fc4a_00000 | TERMINATED |    200 |          351.505 |
++---------------------------+------------+--------+------------------+...
 """
 import gymnasium as gym
 
@@ -27,12 +93,8 @@
 parser = add_rllib_example_script_args(
     default_timesteps=5000000, default_reward=20.0, default_iters=200
 )
-parser.add_argument(
-    "--atari-env",
-    type=str,
-    default="ALE/Pong-v5",
-    help="The name of the Atari env to run, e.g. `ALE/Breakout-v5`.",
-)
+# Use Pong by default.
+parser.set_defaults(env="ALE/Pong-v5")
 parser.add_argument(
     "--num-frames",
     type=int,
@@ -52,12 +114,16 @@
 
     args = parser.parse_args()
 
+    assert (
+        args.enable_new_api_stack
+    ), "Must set --enable-new-api-stack when running this script!"
+
     # Define our custom connector pipelines.
     def _make_env_to_module_connector(env):
         # Create the env-to-module connector. We return an individual connector piece
-        # here, which RLlib will then automatically integrate into a pipeline (and
+        # here, which RLlib automatically integrates into a pipeline (and
         # add its default connector piece to the end of that pipeline).
-        # This pipeline also automatically fixes the input- and output spaces of the
+        # The default pipeline automatically fixes the input- and output spaces of the
         # individual connector pieces in it.
         # Note that since the frame stacking connector does NOT write information
         # back to the episode (in order to save memory and network traffic), we
@@ -79,29 +145,29 @@ def _make_learner_connector(input_observation_space, input_action_space):
     # We would like our frame stacking connector to do this job.
     def _env_creator(cfg):
         return wrap_atari_for_new_api_stack(
-            gym.make(args.atari_env, **cfg, **{"render_mode": "rgb_array"}),
+            gym.make(args.env, **cfg, **{"render_mode": "rgb_array"}),
             # Perform framestacking either through ConnectorV2 or right here through
             # the observation wrapper.
             framestack=(
-                args.num_framestack if args.use_gym_wrapper_framestacking else None
+                args.num_frames if args.use_gym_wrapper_framestacking else None
             ),
         )
 
     if args.num_agents > 0:
         tune.register_env(
-            "env",
+            "atari-env",
             lambda cfg: make_multi_agent(_env_creator)(
                 dict(cfg, **{"num_agents": args.num_agents})
             ),
         )
     else:
-        tune.register_env("env", _env_creator)
+        tune.register_env("atari-env", _env_creator)
 
     base_config = (
         get_trainable_cls(args.algo)
         .get_default_config()
         .environment(
-            "env",
+            "atari-env",
             env_config={
                 # Make analogous to old v4 + NoFrameskip.
                 "frameskip": 1,
@@ -135,9 +201,7 @@ def _env_creator(cfg):
             grad_clip=100.0,
             grad_clip_by="global_norm",
         )
-    )
-    if args.enable_new_api_stack:
-        base_config.rl_module(
+        .rl_module(
             model_config_dict=dict(
                 {
                     "vf_share_layers": True,
@@ -148,16 +212,7 @@ def _env_creator(cfg):
                 },
             )
         )
-    else:
-        base_config.training(
-            model={
-                "vf_share_layers": True,
-                "conv_filters": [[16, 4, 2], [32, 4, 2], [64, 4, 2], [128, 4, 2]],
-                "conv_activation": "relu",
-                "post_fcnet_hiddens": [256],
-                "uses_new_env_runners": False,
-            }
-        )
+    )
 
     # Add a simple multi-agent setup.
     if args.num_agents > 0:
diff --git a/rllib/examples/connectors/mean_std_filtering.py b/rllib/examples/connectors/mean_std_filtering.py
index a30d6e399c00c..470812585138b 100644
--- a/rllib/examples/connectors/mean_std_filtering.py
+++ b/rllib/examples/connectors/mean_std_filtering.py
@@ -1,13 +1,81 @@
-from ray.air.constants import TRAINING_ITERATION
+"""Example using a ConnectorV2 for processing observations with a mean/std filter.
+
+An RLlib Algorithm has 3 distinct connector pipelines:
+- An env-to-module pipeline in an EnvRunner accepting a list of episodes and producing
+a batch for an RLModule to compute actions (`forward_inference()` or
+`forward_exploration()`).
+- A module-to-env pipeline in an EnvRunner taking the RLModule's output and converting
+it into an action readable by the environment.
+- A learner connector pipeline on a Learner taking a list of episodes and producing
+a batch for an RLModule to perform the training forward pass (`forward_train()`).
+
+Each of these pipelines has a fixed set of default ConnectorV2 pieces that RLlib
+adds/prepends to these pipelines in order to perform the most basic functionalities.
+For example, RLlib adds the `AddObservationsFromEpisodesToBatch` ConnectorV2 into any
+env-to-module pipeline to make sure the batch for computing actions contains - at the
+minimum - the most recent observation.
+
+On top of these default ConnectorV2 pieces, users can define their own ConnectorV2
+pieces (or use the ones available already in RLlib) and add them to one of the 3
+different pipelines described above, as required.
+
+This example:
+    - shows how the `MeanStdFilter` ConnectorV2 piece can be added to the env-to-module
+    pipeline.
+    - demonstrates that using such a filter enhances learning behavior (or even makes
+    if possible to learn overall) in some environments, especially those with lopsided
+    observation spaces, for example `Box(-3000, -1000, ...)`.
+
+
+How to run this script
+----------------------
+`python [script file name].py --enable-new-api-stack`
+
+For debugging, use the following additional command line options
+`--no-tune --num-env-runners=0`
+which should allow you to set breakpoints anywhere in the RLlib code and
+have the execution stop there for inspection and debugging.
+
+For logging to your WandB account, use:
+`--wandb-key=[your WandB API key] --wandb-project=[some project name]
+--wandb-run-name=[optional: WandB run name (within the defined project)]`
+
+
+Results to expect
+-----------------
+Running this example with the mean-std filter results in the normally expected Pendulum
+learning behavior:
++-------------------------------+------------+-----------------+--------+
+| Trial name                    | status     | loc             |   iter |
+|                               |            |                 |        |
+|-------------------------------+------------+-----------------+--------+
+| PPO_lopsided-pend_f9c96_00000 | TERMINATED | 127.0.0.1:43612 |     77 |
++-------------------------------+------------+-----------------+--------+
++------------------+------------------------+-----------------------+
+|   total time (s) |   num_env_steps_sample |   episode_return_mean |
+|                  |             d_lifetime |                       |
+|------------------+------------------------+-----------------------|
+|          30.7466 |                  40040 |                -276.3 |
++------------------+------------------------+-----------------------+
+
+If you try using the `--disable-mean-std-filter` (all other things being equal), you
+will either see no learning progress at all (or a very slow one), but more likely some
+numerical instability related error will be thrown:
+
+ValueError: Expected parameter loc (Tensor of shape (64, 1)) of distribution
+            Normal(loc: torch.Size([64, 1]), scale: torch.Size([64, 1])) to satisfy the
+            constraint Real(), but found invalid values:
+tensor([[nan],
+        [nan],
+        [nan],
+        ...
+"""
+import gymnasium as gym
+import numpy as np
+
 from ray.rllib.connectors.env_to_module.mean_std_filter import MeanStdFilter
 from ray.rllib.examples.envs.classes.multi_agent import MultiAgentPendulum
 from ray.rllib.utils.framework import try_import_torch
-from ray.rllib.utils.metrics import (
-    ENV_RUNNER_RESULTS,
-    EPISODE_RETURN_MEAN,
-    EVALUATION_RESULTS,
-    NUM_ENV_STEPS_SAMPLED_LIFETIME,
-)
 from ray.rllib.utils.test_utils import (
     add_rllib_example_script_args,
     run_rllib_example_script_experiment,
@@ -21,22 +89,43 @@
     default_timesteps=500000,
     default_reward=-300.0,
 )
+parser.add_argument(
+    "--disable-mean-std-filter",
+    action="store_true",
+    help="Run w/o a mean/std env-to-module connector piece (filter).",
+)
+
+
+class LopsidedObs(gym.ObservationWrapper):
+    def __init__(self, env):
+        super().__init__(env)
+        self.observation_space = gym.spaces.Box(-4000.0, -1456.0, (3,), np.float32)
+
+    def observation(self, observation):
+        # Lopside [-1.0, 1.0] Pendulum observations
+        return ((observation + 1.0) / 2.0) * (4000.0 - 1456.0) - 4000.0
 
 
 if __name__ == "__main__":
     args = parser.parse_args()
 
+    assert (
+        args.enable_new_api_stack
+    ), "Must set --enable-new-api-stack when running this script!"
+
     # Register our environment with tune.
     if args.num_agents > 0:
         register_env(
-            "env",
+            "lopsided-pend",
             lambda _: MultiAgentPendulum(config={"num_agents": args.num_agents}),
         )
+    else:
+        register_env("lopsided-pend", lambda _: LopsidedObs(gym.make("Pendulum-v1")))
 
     config = (
         get_trainable_cls(args.algo)
         .get_default_config()
-        .environment("env" if args.num_agents > 0 else "Pendulum-v1")
+        .environment("lopsided-pend")
         .env_runners(
             # TODO (sven): MAEnvRunner does not support vectorized envs yet
             #  due to gym's env checkers and non-compatability with RLlib's
@@ -48,7 +137,9 @@
             # included in an automatically generated EnvToModulePipeline or return a
             # EnvToModulePipeline directly.
             env_to_module_connector=(
-                lambda env: MeanStdFilter(multi_agent=args.num_agents > 0)
+                None
+                if args.disable_mean_std_filter
+                else lambda env: MeanStdFilter(multi_agent=args.num_agents > 0)
             ),
         )
         .training(
@@ -61,25 +152,7 @@
             vf_clip_param=10.0,
             vf_loss_coeff=0.01,
         )
-        .evaluation(
-            evaluation_num_env_runners=1,
-            evaluation_parallel_to_training=True,
-            evaluation_interval=1,
-            evaluation_duration=10,
-            evaluation_duration_unit="episodes",
-            evaluation_config={
-                "explore": False,
-                # Do NOT use the eval EnvRunners' ConnectorV2 states. Instead, before
-                # each round of evaluation, broadcast the latest training
-                # EnvRunnerGroup's ConnectorV2 states (merged from all training remote
-                # EnvRunners) to the eval EnvRunnerGroup (and discard the eval
-                # EnvRunners' stats).
-                "use_worker_filter_stats": False,
-            },
-        )
-    )
-    if args.enable_new_api_stack:
-        config = config.rl_module(
+        .rl_module(
             model_config_dict={
                 "fcnet_activation": "relu",
                 "fcnet_weights_initializer": torch.nn.init.xavier_uniform_,
@@ -88,17 +161,27 @@
                 "uses_new_env_runners": True,
             }
         )
-    else:
-        config = config.training(
-            model=dict(
-                {
-                    "fcnet_activation": "relu",
-                    "fcnet_weights_initializer": torch.nn.init.xavier_uniform_,
-                    "fcnet_bias_initializer": torch.nn.init.constant_,
-                    "fcnet_bias_initializer_config": {"val": 0.0},
-                }
-            )
-        )
+        # In case you would like to run with a evaluation EnvRunners, make sure your
+        # `evaluation_config` key contains the `use_worker_filter_stats=False` setting
+        # (see below). This setting makes sure that the mean/std stats collected by the
+        # evaluation EnvRunners are NOT used for the training EnvRunners (unless you
+        # really want to mix these stats). It's normally a good idea to keep the stats
+        # collected during evaluation completely out of the training data (already for
+        # better reproducibility alone).
+        # .evaluation(
+        #    evaluation_num_env_runners=1,
+        #    evaluation_interval=1,
+        #    evaluation_config={
+        #        "explore": False,
+        #        # Do NOT use the eval EnvRunners' ConnectorV2 states. Instead, before
+        #        # each round of evaluation, broadcast the latest training
+        #        # EnvRunnerGroup's ConnectorV2 states (merged from all training remote
+        #        # EnvRunners) to the eval EnvRunnerGroup (and discard the eval
+        #        # EnvRunners' stats).
+        #        "use_worker_filter_stats": False,
+        #    },
+        # )
+    )
 
     # Add a simple multi-agent setup.
     if args.num_agents > 0:
@@ -107,12 +190,4 @@
             policy_mapping_fn=lambda aid, *a, **kw: f"p{aid}",
         )
 
-    stop = {
-        TRAINING_ITERATION: args.stop_iters,
-        f"{EVALUATION_RESULTS}/{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}": (
-            args.stop_reward
-        ),
-        NUM_ENV_STEPS_SAMPLED_LIFETIME: args.stop_timesteps,
-    }
-
-    run_rllib_example_script_experiment(config, args, stop=stop)
+    run_rllib_example_script_experiment(config, args)
diff --git a/rllib/examples/connectors/nested_observation_spaces.py b/rllib/examples/connectors/nested_observation_spaces.py
deleted file mode 100644
index 39a4bac1c585e..0000000000000
--- a/rllib/examples/connectors/nested_observation_spaces.py
+++ /dev/null
@@ -1,88 +0,0 @@
-from ray.tune.registry import register_env
-from ray.rllib.connectors.env_to_module import (
-    AddObservationsFromEpisodesToBatch,
-    FlattenObservations,
-    WriteObservationsToEpisodes,
-)
-from ray.rllib.examples.envs.classes.cartpole_with_dict_observation_space import (
-    CartPoleWithDictObservationSpace,
-)
-from ray.rllib.examples.envs.classes.multi_agent import (
-    MultiAgentCartPoleWithDictObservationSpace,
-)
-from ray.rllib.utils.test_utils import (
-    add_rllib_example_script_args,
-    run_rllib_example_script_experiment,
-)
-from ray.tune.registry import get_trainable_cls
-
-
-# Read in common example script command line arguments.
-parser = add_rllib_example_script_args(default_timesteps=200000, default_reward=400.0)
-
-
-if __name__ == "__main__":
-    args = parser.parse_args()
-
-    # Define env-to-module-connector pipeline for the new stack.
-    def _env_to_module_pipeline(env):
-        return [
-            AddObservationsFromEpisodesToBatch(),
-            FlattenObservations(multi_agent=args.num_agents > 0),
-            WriteObservationsToEpisodes(),
-        ]
-
-    # Register our environment with tune.
-    if args.num_agents > 0:
-        register_env(
-            "env",
-            lambda _: MultiAgentCartPoleWithDictObservationSpace(
-                config={"num_agents": args.num_agents}
-            ),
-        )
-    else:
-        register_env("env", lambda _: CartPoleWithDictObservationSpace())
-
-    # Define the AlgorithmConfig used.
-    config = (
-        get_trainable_cls(args.algo)
-        .get_default_config()
-        .environment("env")
-        .env_runners(env_to_module_connector=_env_to_module_pipeline)
-        .training(
-            gamma=0.99,
-            lr=0.0003,
-        )
-    )
-    if args.enable_new_api_stack:
-        config = config.rl_module(
-            model_config_dict={
-                "fcnet_hiddens": [32],
-                "fcnet_activation": "linear",
-                "vf_share_layers": True,
-                "uses_new_env_runners": True,
-            },
-        )
-    else:
-        config = config.training(
-            model=dict(
-                fcnet_hiddens=[32], fcnet_activation="linear", vf_share_layers=True
-            )
-        )
-
-    # Add a simple multi-agent setup.
-    if args.num_agents > 0:
-        config = config.multi_agent(
-            policies={f"p{i}" for i in range(args.num_agents)},
-            policy_mapping_fn=lambda aid, *a, **kw: f"p{aid}",
-        )
-
-    # Fix some PPO-specific settings.
-    if args.algo == "PPO":
-        config = config.training(
-            num_sgd_iter=6,
-            vf_loss_coeff=0.01,
-        )
-
-    # Run everything as configured.
-    run_rllib_example_script_experiment(config, args)
diff --git a/rllib/examples/connectors/prev_actions_prev_rewards.py b/rllib/examples/connectors/prev_actions_prev_rewards.py
index 0c3a2693cca27..dcee6ac5689eb 100644
--- a/rllib/examples/connectors/prev_actions_prev_rewards.py
+++ b/rllib/examples/connectors/prev_actions_prev_rewards.py
@@ -1,11 +1,89 @@
+"""Example using a ConnectorV2 to add previous rewards/actions to an RLModule's input.
+
+An RLlib Algorithm has 3 distinct connector pipelines:
+- An env-to-module pipeline in an EnvRunner accepting a list of episodes and producing
+a batch for an RLModule to compute actions (`forward_inference()` or
+`forward_exploration()`).
+- A module-to-env pipeline in an EnvRunner taking the RLModule's output and converting
+it into an action readable by the environment.
+- A learner connector pipeline on a Learner taking a list of episodes and producing
+a batch for an RLModule to perform the training forward pass (`forward_train()`).
+
+Each of these pipelines has a fixed set of default ConnectorV2 pieces that RLlib
+adds/prepends to these pipelines in order to perform the most basic functionalities.
+For example, RLlib adds the `AddObservationsFromEpisodesToBatch` ConnectorV2 into any
+env-to-module pipeline to make sure the batch for computing actions contains - at the
+minimum - the most recent observation.
+
+On top of these default ConnectorV2 pieces, users can define their own ConnectorV2
+pieces (or use the ones available already in RLlib) and add them to one of the 3
+different pipelines described above, as required.
+
+This example:
+    - shows how the `PrevActionsPrevRewards` ConnectorV2 piece can be added to the
+    env-to-module pipeline to extract previous rewards and/or actions from the ongoing
+    episodes.
+    - shows how this connector creates  and wraps this new information (rewards and
+    actions) together with the original observations into the RLModule's input dict
+    under a new `gym.spaces.Dict` structure (for example, if your observation space
+    is `O=Box(shape=(3,))` and you add the most recent 1 reward, the new observation
+    space will be `Dict({"_original_obs": O, "prev_n_rewards": Box(shape=())})`.
+    - demonstrates how to use RLlib's `FlattenObservations` right after the
+    `PrevActionsPrevRewards` to flatten that new dict observation structure again into
+    a single 1D tensor.
+    - uses the StatelessCartPole environment, a CartPole-v1 derivative that's missing
+    both x-veloc and angle-veloc observation components and is therefore non-Markovian
+    (only partially observable). An LSTM default model is used for training. Adding
+    the additional context to the observations (for example, prev. actions) helps the
+    LSTM to more quickly learn in this environment.
+
+
+How to run this script
+----------------------
+`python [script file name].py --enable-new-api-stack --num-frames=4 --env=ALE/Pong-v5`
+
+Use the `--num-frames` option to define the number of observations to framestack.
+If you don't want to use Connectors to perform the framestacking, set the
+`--use-gym-wrapper-framestacking` flag to perform framestacking already inside a
+gymnasium observation wrapper. In this case though, be aware that the tensors being
+sent through the network are `--num-frames` x larger than if you use the Connector
+setup.
+
+For debugging, use the following additional command line options
+`--no-tune --num-env-runners=0`
+which should allow you to set breakpoints anywhere in the RLlib code and
+have the execution stop there for inspection and debugging.
+
+For logging to your WandB account, use:
+`--wandb-key=[your WandB API key] --wandb-project=[some project name]
+--wandb-run-name=[optional: WandB run name (within the defined project)]`
+
+
+Results to expect
+-----------------
+
+You should see something similar to this in your terminal output when running
+ths script as described above:
+
++---------------------+------------+-----------------+--------+------------------+
+| Trial name          | status     | loc             |   iter |   total time (s) |
+|                     |            |                 |        |                  |
+|---------------------+------------+-----------------+--------+------------------+
+| PPO_env_0edd2_00000 | TERMINATED | 127.0.0.1:12632 |     17 |          42.6898 |
++---------------------+------------+-----------------+--------+------------------+
++------------------------+------------------------+------------------------+
+|   num_env_steps_sample |   num_env_steps_traine |   episode_return_mean  |
+|             d_lifetime |             d_lifetime |                        |
+|------------------------+------------------------+------------------------|
+|                  68000 |                  68000 |                 205.22 |
++------------------------+------------------------+------------------------+
+"""
 import functools
 
 from ray.rllib.algorithms.ppo import PPOConfig
 from ray.rllib.connectors.env_to_module import (
-    AddObservationsFromEpisodesToBatch,
     FlattenObservations,
-    PrevActionsPrevRewardsConnector,
-    WriteObservationsToEpisodes,
+    PrevActionsPrevRewards,
 )
 from ray.rllib.examples.envs.classes.stateless_cartpole import StatelessCartPole
 from ray.rllib.examples.envs.classes.multi_agent import MultiAgentStatelessCartPole
@@ -29,18 +107,22 @@
 if __name__ == "__main__":
     args = parser.parse_args()
 
+    assert (
+        args.enable_new_api_stack
+    ), "Must set --enable-new-api-stack when running this script!"
+
     # Define our custom connector pipelines.
     def _env_to_module(env):
         # Create the env-to-module connector pipeline.
         return [
-            AddObservationsFromEpisodesToBatch(),
-            PrevActionsPrevRewardsConnector(
+            # AddObservationsFromEpisodesToBatch(),
+            PrevActionsPrevRewards(
                 multi_agent=args.num_agents > 0,
                 n_prev_rewards=args.n_prev_rewards,
                 n_prev_actions=args.n_prev_actions,
             ),
             FlattenObservations(multi_agent=args.num_agents > 0),
-            WriteObservationsToEpisodes(),
+            # WriteObservationsToEpisodes(),
         ]
 
     # Register our environment with tune.
@@ -64,10 +146,7 @@ def _env_to_module(env):
             train_batch_size=4000,
             vf_loss_coeff=0.01,
         )
-    )
-
-    if args.enable_new_api_stack:
-        config = config.rl_module(
+        .rl_module(
             model_config_dict={
                 "use_lstm": True,
                 "max_seq_len": 50,
@@ -79,20 +158,7 @@ def _env_to_module(env):
                 "uses_new_env_runners": True,
             }
         )
-    else:
-        config = config.training(
-            model=dict(
-                {
-                    "use_lstm": True,
-                    "max_seq_len": 50,
-                    "fcnet_hiddens": [32],
-                    "fcnet_activation": "linear",
-                    "vf_share_layers": True,
-                    "fcnet_weights_initializer": nn.init.xavier_uniform_,
-                    "fcnet_bias_initializer": functools.partial(nn.init.constant_, 0.0),
-                }
-            )
-        )
+    )
 
     # Add a simple multi-agent setup.
     if args.num_agents > 0:
diff --git a/rllib/examples/curriculum/curriculum_learning.py b/rllib/examples/curriculum/curriculum_learning.py
index 1e7ba0250ae08..f6b4e1ab7cf2b 100644
--- a/rllib/examples/curriculum/curriculum_learning.py
+++ b/rllib/examples/curriculum/curriculum_learning.py
@@ -59,11 +59,7 @@
 from ray.air.constants import TRAINING_ITERATION
 from ray.rllib.algorithms.algorithm import Algorithm
 from ray.rllib.algorithms.callbacks import DefaultCallbacks
-from ray.rllib.connectors.env_to_module import (
-    AddObservationsFromEpisodesToBatch,
-    FlattenObservations,
-    WriteObservationsToEpisodes,
-)
+from ray.rllib.connectors.env_to_module import FlattenObservations
 from ray.rllib.utils.metrics import (
     ENV_RUNNER_RESULTS,
     EPISODE_RETURN_MEAN,
@@ -222,11 +218,7 @@ def on_train_result(
         )
         .env_runners(
             num_envs_per_env_runner=5,
-            env_to_module_connector=lambda env: [
-                AddObservationsFromEpisodesToBatch(),
-                FlattenObservations(),
-                WriteObservationsToEpisodes(),
-            ],
+            env_to_module_connector=lambda env: FlattenObservations(),
         )
     )
 
diff --git a/rllib/examples/inference/policy_inference_after_training.py b/rllib/examples/inference/policy_inference_after_training.py
index 0f61f4519cd7f..2525d5ca29354 100644
--- a/rllib/examples/inference/policy_inference_after_training.py
+++ b/rllib/examples/inference/policy_inference_after_training.py
@@ -4,13 +4,13 @@
 from a checkpoint and a manual env-loop (CartPole-v1). No ConnectorV2s or EnvRunners are
 used in this example.
 
-This example shows ..
-  - .. how to use an already existing checkpoint to extract a single-agent RLModule
-  from (our policy network).
-  - .. how to setup this recovered policy net for action computations (with or without
-  using exploration).
-  - .. have the policy run through a very simple gymnasium based env-loop, w/o using
-  RLlib's ConnectorV2s or EnvRunners.
+This example:
+    - shows how to use an already existing checkpoint to extract a single-agent RLModule
+    from (our policy network).
+    - shows how to setup this recovered policy net for action computations (with or
+    without using exploration).
+    - shows have the policy run through a very simple gymnasium based env-loop, w/o
+    using RLlib's ConnectorV2s or EnvRunners.
 
 
 How to run this script
diff --git a/rllib/examples/inference/policy_inference_after_training_w_connector.py b/rllib/examples/inference/policy_inference_after_training_w_connector.py
index 6d97ef61f8657..e4a66ec332660 100644
--- a/rllib/examples/inference/policy_inference_after_training_w_connector.py
+++ b/rllib/examples/inference/policy_inference_after_training_w_connector.py
@@ -6,14 +6,14 @@
 The RLModule contains an LSTM that requires its own previous STATE_OUT as new input
 at every episode step to compute a new action.
 
-This example shows ..
-  - .. how to use an already existing checkpoint to extract a single-agent RLModule
-  from (our policy network).
-  - .. how to setup this recovered policy net for action computations (with or without
-  using exploration).
-  - .. how to create a more complex env-loop in which the action-computing RLModule
-  requires its own previous state outputs as new input and how to use RLlib's Episode
-  APIs to achieve this.
+This example:
+    - shows how to use an already existing checkpoint to extract a single-agent RLModule
+    from (our policy network).
+    - shows how to setup this recovered policy net for action computations (with or
+    without using exploration).
+    - shows how to create a more complex env-loop in which the action-computing RLModule
+    requires its own previous state outputs as new input and how to use RLlib's Episode
+    APIs to achieve this.
 
 
 How to run this script
diff --git a/rllib/examples/multi_agent/rock_paper_scissors_heuristic_vs_learned.py b/rllib/examples/multi_agent/rock_paper_scissors_heuristic_vs_learned.py
index d503e7f23ad3d..1f7ad8dc238c5 100644
--- a/rllib/examples/multi_agent/rock_paper_scissors_heuristic_vs_learned.py
+++ b/rllib/examples/multi_agent/rock_paper_scissors_heuristic_vs_learned.py
@@ -33,11 +33,7 @@
 from pettingzoo.classic import rps_v2
 
 from ray.air.constants import TRAINING_ITERATION
-from ray.rllib.connectors.env_to_module import (
-    AddObservationsFromEpisodesToBatch,
-    FlattenObservations,
-    WriteObservationsToEpisodes,
-)
+from ray.rllib.connectors.env_to_module import FlattenObservations
 from ray.rllib.core.rl_module.marl_module import MultiAgentRLModuleSpec
 from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
 from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv
@@ -89,10 +85,8 @@
         .environment("RockPaperScissors")
         .env_runners(
             env_to_module_connector=lambda env: (
-                AddObservationsFromEpisodesToBatch(),
-                # Only flatten obs for the learning RLModul
+                # `agent_ids=...`: Only flatten obs for the learning RLModule.
                 FlattenObservations(multi_agent=True, agent_ids={"player_0"}),
-                WriteObservationsToEpisodes(),
             ),
         )
         .multi_agent(
diff --git a/rllib/examples/multi_agent/rock_paper_scissors_learned_vs_learned.py b/rllib/examples/multi_agent/rock_paper_scissors_learned_vs_learned.py
index 507c018babc8f..e3e75c9906924 100644
--- a/rllib/examples/multi_agent/rock_paper_scissors_learned_vs_learned.py
+++ b/rllib/examples/multi_agent/rock_paper_scissors_learned_vs_learned.py
@@ -15,11 +15,7 @@
 
 from pettingzoo.classic import rps_v2
 
-from ray.rllib.connectors.env_to_module import (
-    AddObservationsFromEpisodesToBatch,
-    FlattenObservations,
-    WriteObservationsToEpisodes,
-)
+from ray.rllib.connectors.env_to_module import FlattenObservations
 from ray.rllib.core.rl_module.marl_module import MultiAgentRLModuleSpec
 from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
 from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv
@@ -62,11 +58,7 @@
         .get_default_config()
         .environment("RockPaperScissors")
         .env_runners(
-            env_to_module_connector=lambda env: (
-                AddObservationsFromEpisodesToBatch(),
-                FlattenObservations(multi_agent=True),
-                WriteObservationsToEpisodes(),
-            ),
+            env_to_module_connector=lambda env: FlattenObservations(multi_agent=True),
         )
         .multi_agent(
             policies={"p0", "p1"},
diff --git a/rllib/examples/multi_agent/two_step_game_with_grouped_agents.py b/rllib/examples/multi_agent/two_step_game_with_grouped_agents.py
index afabd3fe90036..2c94358222905 100644
--- a/rllib/examples/multi_agent/two_step_game_with_grouped_agents.py
+++ b/rllib/examples/multi_agent/two_step_game_with_grouped_agents.py
@@ -40,11 +40,7 @@
 +------------------+-------+-------------------+-------------+
 """
 
-from ray.rllib.connectors.env_to_module import (
-    AddObservationsFromEpisodesToBatch,
-    FlattenObservations,
-    WriteObservationsToEpisodes,
-)
+from ray.rllib.connectors.env_to_module import FlattenObservations
 from ray.rllib.core.rl_module.marl_module import MultiAgentRLModuleSpec
 from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
 from ray.rllib.examples.envs.classes.two_step_game import TwoStepGameWithGroupedAgents
@@ -76,11 +72,7 @@
         .get_default_config()
         .environment("grouped_twostep")
         .env_runners(
-            env_to_module_connector=lambda env: (
-                AddObservationsFromEpisodesToBatch(),
-                FlattenObservations(multi_agent=True),
-                WriteObservationsToEpisodes(),
-            ),
+            env_to_module_connector=lambda env: FlattenObservations(multi_agent=True),
         )
         .multi_agent(
             policies={"p0"},