[RLlib] Cleanup examples folder #16: Add missing docstrings to 2 conn…

…ector example scripts. (ray-project#45864)
sven1977 · Jun 13, 2024 · f49557f · f49557f
1 parent 384f46c
commit f49557f
Show file tree

Hide file tree

Showing 23 changed files with 617 additions and 404 deletions.
diff --git a/rllib/BUILD b/rllib/BUILD
@@ -2086,6 +2086,27 @@ py_test(
 # tagged by @OldAPIStack and/or @HybridAPIStack
 # ----------------------
 
+# subdirectory: actions/
+
+# Nested action spaces (flattening obs and learning w/ multi-action distribution).
+py_test(
+ name = "examples/actions/nested_action_spaces_ppo",
+ main = "examples/actions/nested_action_spaces.py",
+ tags = ["team:rllib", "exclusive", "examples"],
+ size = "large",
+ srcs = ["examples/actions/nested_action_spaces.py"],
+ args = ["--enable-new-api-stack", "--as-test", "--framework=torch", "--stop-reward=-500.0", "--algo=PPO"]
+)
+
+py_test(
+ name = "examples/actions/nested_action_spaces_multi_agent_ppo",
+ main = "examples/actions/nested_action_spaces.py",
+ tags = ["team:rllib", "exclusive", "examples"],
+ size = "large",
+ srcs = ["examples/actions/nested_action_spaces.py"],
+ args = ["--enable-new-api-stack", "--as-test", "--num-agents=2", "--framework=torch", "--stop-reward=-1000.0", "--algo=PPO"]
+)
+
 # subdirectory: algorithms/
 
 #@OldAPIStack
@@ -2213,41 +2234,22 @@ py_test(
  args = ["--enable-new-api-stack", "--num-agents=2", "--stop-iter=2", "--framework=torch", "--algo=PPO", "--num-env-runners=4", "--num-cpus=6"]
 )
 
-# Nested action spaces (flattening obs and learning w/ multi-action distribution).
-py_test(
- name = "examples/connectors/nested_action_spaces_ppo",
- main = "examples/connectors/nested_action_spaces.py",
- tags = ["team:rllib", "exclusive", "examples"],
- size = "large",
- srcs = ["examples/connectors/nested_action_spaces.py"],
- args = ["--enable-new-api-stack", "--as-test", "--framework=torch", "--stop-reward=-500.0", "--algo=PPO"]
-)
-
-py_test(
- name = "examples/connectors/nested_action_spaces_multi_agent_ppo",
- main = "examples/connectors/nested_action_spaces.py",
- tags = ["team:rllib", "exclusive", "examples"],
- size = "large",
- srcs = ["examples/connectors/nested_action_spaces.py"],
- args = ["--enable-new-api-stack", "--as-test", "--num-agents=2", "--framework=torch", "--stop-reward=-1000.0", "--algo=PPO"]
-)
-
 # Nested observation spaces (flattening).
 py_test(
- name = "examples/connectors/nested_observation_spaces_ppo",
- main = "examples/connectors/nested_observation_spaces.py",
+ name = "examples/connectors/flatten_observations_dict_space_ppo",
+ main = "examples/connectors/flatten_observations_dict_space.py",
  tags = ["team:rllib", "exclusive", "examples"],
  size = "medium",
- srcs = ["examples/connectors/nested_observation_spaces.py"],
+ srcs = ["examples/connectors/flatten_observations_dict_space.py"],
  args = ["--enable-new-api-stack", "--as-test", "--stop-reward=400.0", "--framework=torch", "--algo=PPO"]
 )
 
 py_test(
- name = "examples/connectors/nested_observation_spaces_multi_agent_ppo",
- main = "examples/connectors/nested_observation_spaces.py",
+ name = "examples/connectors/flatten_observations_dict_space_multi_agent_ppo",
+ main = "examples/connectors/flatten_observations_dict_space.py",
  tags = ["team:rllib", "exclusive", "examples"],
  size = "medium",
- srcs = ["examples/connectors/nested_observation_spaces.py"],
+ srcs = ["examples/connectors/flatten_observations_dict_space.py"],
  args = ["--enable-new-api-stack", "--num-agents=2", "--as-test", "--stop-reward=800.0", "--framework=torch", "--algo=PPO"]
 )
 

diff --git a/rllib/connectors/connector_pipeline_v2.py b/rllib/connectors/connector_pipeline_v2.py
@@ -312,7 +312,7 @@ def _fix_spaces(self):
  obs_space = self.input_observation_space
  act_space = self.input_action_space
  for con in self.connectors:
- con.input_observation_space = obs_space
  con.input_action_space = act_space
+ con.input_observation_space = obs_space
  obs_space = con.observation_space
  act_space = con.action_space
diff --git a/rllib/connectors/connector_v2.py b/rllib/connectors/connector_v2.py
@@ -84,8 +84,9 @@ def __init__(
  self._action_space = None
  self._input_observation_space = None
  self._input_action_space = None
- self.input_observation_space = input_observation_space
+
  self.input_action_space = input_action_space
+ self.input_observation_space = input_observation_space
 
  @OverrideToImplementCustomLogic
  def recompute_observation_space_from_input_spaces(self) -> gym.Space:

diff --git a/rllib/connectors/env_to_module/__init__.py b/rllib/connectors/env_to_module/__init__.py
@@ -14,7 +14,7 @@
  FlattenObservations,
 )
 from ray.rllib.connectors.env_to_module.prev_actions_prev_rewards import (
- PrevActionsPrevRewardsConnector,
+ PrevActionsPrevRewards,
 )
 from ray.rllib.connectors.env_to_module.write_observations_to_episodes import (
  WriteObservationsToEpisodes,
@@ -29,6 +29,6 @@
  "EnvToModulePipeline",
  "FlattenObservations",
  "NumpyToTensor",
- "PrevActionsPrevRewardsConnector",
+ "PrevActionsPrevRewards",
  "WriteObservationsToEpisodes",
 ]
diff --git a/rllib/connectors/env_to_module/flatten_observations.py b/rllib/connectors/env_to_module/flatten_observations.py
@@ -6,7 +6,6 @@
 import tree # pip install dm_tree
 
 from ray.rllib.connectors.connector_v2 import ConnectorV2
-from ray.rllib.core.columns import Columns
 from ray.rllib.core.rl_module.rl_module import RLModule
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.numpy import flatten_inputs_to_1d_tensor
@@ -19,25 +18,20 @@
 class FlattenObservations(ConnectorV2):
  """A connector piece that flattens all observation components into a 1D array.
 
- - Only works on data that has already been added to the batch.
- - This connector makes the assumption that under the Columns.OBS key in batch,
- there is either a list of individual env observations to be flattened (single-agent
- case) or a dict mapping agent- and module IDs to lists of data items to be
- flattened (multi-agent case).
- - Does NOT work in a Learner pipeline as it operates on individual observation
- items (as opposed to batched/time-ranked data).
- - Therefore, assumes that the altered (flattened) observations will be written
- back into the episode by a later connector piece in the env-to-module pipeline
- (which this piece is part of as well).
- - Does NOT read any information from the given list of Episode objects.
- - Does NOT write any observations (or other data) to the given Episode objects.
+ - Works directly on the incoming episodes list and changes the last observation
+ in-place (write the flattened observation back into the episode).
+ - This connector does NOT alter the incoming batch (`data`) when called.
+ - This connector does NOT work in a `LearnerConnectorPipeline` because it requires
+ the incoming episodes to still be ongoing (in progress) as it only alters the
+ latest observation, not all observations in an episode.
 
  .. testcode::
 
  import gymnasium as gym
  import numpy as np
 
  from ray.rllib.connectors.env_to_module import FlattenObservations
+ from ray.rllib.env.single_agent_episode import SingleAgentEpisode
  from ray.rllib.utils.test_utils import check
 
  # Some arbitrarily nested, complex observation space.
@@ -51,48 +45,50 @@ class FlattenObservations(ConnectorV2):
  })
  act_space = gym.spaces.Discrete(2)
 
- # A batch of two example items, both coming from the above defined observation
- # space.
- batch = {
- "obs": [
- # 1st example item.
+ # Two example episodes, both with initial (reset) observations coming from the
+ # above defined observation space.
+ episode_1 = SingleAgentEpisode(
+ observations=[
  {
  "a": np.array(-10.0, np.float32),
  "b": (1, np.array([[-1.0], [-1.0]], np.float32)),
  "c": np.array([0, 2]),
  },
- # 2nd example item.
+ ],
+ )
+ episode_2 = SingleAgentEpisode(
+ observations=[
  {
  "a": np.array(10.0, np.float32),
  "b": (0, np.array([[1.0], [1.0]], np.float32)),
  "c": np.array([1, 1]),
  },
  ],
- }
+ )
 
  # Construct our connector piece.
  connector = FlattenObservations(obs_space, act_space)
 
  # Call our connector piece with the example data.
  output_data = connector(
  rl_module=None, # This connector works without an RLModule.
- data=batch,
- episodes=[], # This connector does not need the `episodes` input.
+ data={}, # This connector does not alter any data.
+ episodes=[episode_1, episode_2],
  explore=True,
  shared_data={},
  )
 
- # The connector does not change the number of items in the data (still 2 items).
- check(len(output_data["obs"]), 2)
+ # The connector does not alter the data and acts as pure pass-through.
+ check(output_data, {})
 
- # The connector has flattened each item in the data to a 1D tensor.
+ # The connector has flattened each item in the episodes to a 1D tensor.
  check(
- output_data["obs"][0],
+ episode_1.get_observations(0),
  # box() disc(2). box(2, 1). multidisc(2, 3)........
  np.array([-10.0, 0.0, 1.0, -1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 1.0]),
  )
  check(
- output_data["obs"][1],
+ episode_2.get_observations(0),
  # box() disc(2). box(2, 1). multidisc(2, 3)........
  np.array([10.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]),
  )
@@ -169,40 +165,42 @@ def __call__(
  shared_data: Optional[dict] = None,
  **kwargs,
  ) -> Any:
- observations = data.get(Columns.OBS)
-
- if observations is None:
- raise ValueError(
- f"`batch` must already have a column named {Columns.OBS} in it "
- f"for this connector to work!"
- )
-
- # Process each item under the Columns.OBS key individually and flatten
- # it. We are using the `ConnectorV2.foreach_batch_item_change_in_place` API,
- # allowing us to not worry about multi- or single-agent setups and returning
- # the new version of each item we are iterating over.
- self.foreach_batch_item_change_in_place(
- batch=data,
- column=Columns.OBS,
- func=(
- lambda item, eps_id, agent_id, module_id: (
- # Multi-agent AND skip this AgentID.
- item
- if self._agent_ids and agent_id not in self._agent_ids
- # Single-agent or flatten this AgentIDs observation.
- else flatten_inputs_to_1d_tensor(
- item,
+ for sa_episode in self.single_agent_episode_iterator(
+ episodes, agents_that_stepped_only=True
+ ):
+ # Episode is not finalized yet and thus still operates on lists of items.
+ assert not sa_episode.is_finalized
+
+ last_obs = sa_episode.get_observations(-1)
+
+ if self._multi_agent:
+ if (
+ self._agent_ids is not None
+ and sa_episode.agent_id not in self._agent_ids
+ ):
+ flattened_obs = last_obs
+ else:
+ flattened_obs = flatten_inputs_to_1d_tensor(
+ inputs=last_obs,
  # In the multi-agent case, we need to use the specific agent's
  # space struct, not the multi-agent observation space dict.
- (
- self._input_obs_base_struct
- if not agent_id
- else self._input_obs_base_struct[agent_id]
- ),
- # Our items are bare observations (no batch axis present).
+ spaces_struct=self._input_obs_base_struct[sa_episode.agent_id],
+ # Our items are individual observations (no batch axis present).
  batch_axis=False,
  )
+ else:
+ flattened_obs = flatten_inputs_to_1d_tensor(
+ inputs=last_obs,
+ spaces_struct=self._input_obs_base_struct,
+ # Our items are individual observations (no batch axis present).
+ batch_axis=False,
  )
- ),
- )
+
+ # Write new observation directly back into the episode.
+ sa_episode.set_observations(at_indices=-1, new_data=flattened_obs)
+ # We set the Episode's observation space to ours so that we can safely
+ # set the last obs to the new value (without causing a space mismatch
+ # error).
+ sa_episode.observation_space = self.observation_space
+
  return data
diff --git a/rllib/connectors/env_to_module/mean_std_filter.py b/rllib/connectors/env_to_module/mean_std_filter.py
@@ -1,7 +1,7 @@
 from typing import Any, Dict, List, Optional
-from gymnasium.spaces import Discrete, MultiDiscrete
 
 import gymnasium as gym
+from gymnasium.spaces import Discrete, MultiDiscrete
 import numpy as np
 import tree
 
@@ -121,13 +121,10 @@ def __call__(
  sa_obs, update=self._update_stats
  )
  sa_episode.set_observations(at_indices=-1, new_data=normalized_sa_obs)
-
- if len(sa_episode) == 0:
- # TODO (sven): This is kind of a hack.
- # We set the Episode's observation space to ours so that we can safely
- # set the last obs to the new value (without causing a space mismatch
- # error).
- sa_episode.observation_space = self.observation_space
+ # We set the Episode's observation space to ours so that we can safely
+ # set the last obs to the new value (without causing a space mismatch
+ # error).
+ sa_episode.observation_space = self.observation_space
 
  # Leave `data` as is. RLlib's default connector will automatically
  # populate the OBS column therein from the episodes' now transformed