diff --git a/rllib/connectors/common/add_observations_from_episodes_to_batch.py b/rllib/connectors/common/add_observations_from_episodes_to_batch.py index 9f5aad6f1c47..7d55251a841e 100644 --- a/rllib/connectors/common/add_observations_from_episodes_to_batch.py +++ b/rllib/connectors/common/add_observations_from_episodes_to_batch.py @@ -7,11 +7,40 @@ from ray.rllib.core.rl_module.rl_module import RLModule from ray.rllib.utils.annotations import override from ray.rllib.utils.typing import EpisodeType +from ray.util.annotations import PublicAPI +@PublicAPI(stability="alpha") class AddObservationsFromEpisodesToBatch(ConnectorV2): """Gets the last observation from a running episode and adds it to the batch. + Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that + are added automatically by RLlib into every env-to-module/Learner connector + pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or + `config.add_default_connectors_to_learner_pipeline ` are set to + False. + + The default env-to-module connector pipeline is: + [ + [0 or more user defined ConnectorV2 pieces], + AddObservationsFromEpisodesToBatch, + AddStatesFromEpisodesToBatch, + AgentToModuleMapping, # only in multi-agent setups! + BatchIndividualItems, + NumpyToTensor, + ] + The default Learner connector pipeline is: + [ + [0 or more user defined ConnectorV2 pieces], + AddObservationsFromEpisodesToBatch, + AddColumnsFromEpisodesToTrainBatch, + AddStatesFromEpisodesToBatch, + AgentToModuleMapping, # only in multi-agent setups! + BatchIndividualItems, + NumpyToTensor, + ] + + This ConnectorV2: - Operates on a list of Episode objects. - Gets the most recent observation(s) from all the given episodes and adds them to the batch under construction (as a list of individual observations). diff --git a/rllib/connectors/common/add_states_from_episodes_to_batch.py b/rllib/connectors/common/add_states_from_episodes_to_batch.py index 9f6e602387ae..4b15229093b6 100644 --- a/rllib/connectors/common/add_states_from_episodes_to_batch.py +++ b/rllib/connectors/common/add_states_from_episodes_to_batch.py @@ -15,11 +15,39 @@ from ray.rllib.utils.numpy import convert_to_numpy from ray.rllib.utils.spaces.space_utils import batch, BatchedNdArray from ray.rllib.utils.typing import EpisodeType +from ray.util.annotations import PublicAPI +@PublicAPI(stability="alpha") class AddStatesFromEpisodesToBatch(ConnectorV2): """Gets last STATE_OUT from running episode and adds it as STATE_IN to the batch. + Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that + are added automatically by RLlib into every env-to-module/Learner connector + pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or + `config.add_default_connectors_to_learner_pipeline ` are set to + False. + + The default env-to-module connector pipeline is: + [ + [0 or more user defined ConnectorV2 pieces], + AddObservationsFromEpisodesToBatch, + AddStatesFromEpisodesToBatch, + AgentToModuleMapping, # only in multi-agent setups! + BatchIndividualItems, + NumpyToTensor, + ] + The default Learner connector pipeline is: + [ + [0 or more user defined ConnectorV2 pieces], + AddObservationsFromEpisodesToBatch, + AddColumnsFromEpisodesToTrainBatch, + AddStatesFromEpisodesToBatch, + AgentToModuleMapping, # only in multi-agent setups! + BatchIndividualItems, + NumpyToTensor, + ] + If the RLModule is stateful, the episodes' STATE_OUTS will be extracted and restructured under a new STATE_IN key. As a Learner connector, the resulting STATE_IN batch has the shape (B', ...). @@ -32,6 +60,7 @@ class AddStatesFromEpisodesToBatch(ConnectorV2): reshaped into (B, T=max_seq_len (learner) or 1 (env-to-module), ...) and will be zero-padded, if necessary. + This ConnectorV2: - Operates on a list of Episode objects. - Gets the most recent STATE_OUT from all the given episodes and adds them under the STATE_IN key to the batch under construction. diff --git a/rllib/connectors/common/agent_to_module_mapping.py b/rllib/connectors/common/agent_to_module_mapping.py index ee6a738ee41e..1f8a140caf9f 100644 --- a/rllib/connectors/common/agent_to_module_mapping.py +++ b/rllib/connectors/common/agent_to_module_mapping.py @@ -8,11 +8,39 @@ from ray.rllib.env.multi_agent_episode import MultiAgentEpisode from ray.rllib.utils.annotations import override from ray.rllib.utils.typing import EpisodeType, ModuleID +from ray.util.annotations import PublicAPI +@PublicAPI(stability="alpha") class AgentToModuleMapping(ConnectorV2): """ConnectorV2 that performs mapping of data from AgentID based to ModuleID based. + Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that + are added automatically by RLlib into every env-to-module/Learner connector + pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or + `config.add_default_connectors_to_learner_pipeline ` are set to + False. + + The default env-to-module connector pipeline is: + [ + [0 or more user defined ConnectorV2 pieces], + AddObservationsFromEpisodesToBatch, + AddStatesFromEpisodesToBatch, + AgentToModuleMapping, # only in multi-agent setups! + BatchIndividualItems, + NumpyToTensor, + ] + The default Learner connector pipeline is: + [ + [0 or more user defined ConnectorV2 pieces], + AddObservationsFromEpisodesToBatch, + AddColumnsFromEpisodesToTrainBatch, + AddStatesFromEpisodesToBatch, + AgentToModuleMapping, # only in multi-agent setups! + BatchIndividualItems, + NumpyToTensor, + ] + This connector piece is only used by RLlib (as a default connector piece) in a multi-agent setup. diff --git a/rllib/connectors/common/batch_individual_items.py b/rllib/connectors/common/batch_individual_items.py index 02b72c870b2a..f7c5c49eb5ce 100644 --- a/rllib/connectors/common/batch_individual_items.py +++ b/rllib/connectors/common/batch_individual_items.py @@ -10,9 +10,93 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.spaces.space_utils import batch from ray.rllib.utils.typing import EpisodeType +from ray.util.annotations import PublicAPI +@PublicAPI(stability="alpha") class BatchIndividualItems(ConnectorV2): + """Batches individual data-items (in lists) into tensors (with batch dimension). + + Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that + are added automatically by RLlib into every env-to-module/Learner connector + pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or + `config.add_default_connectors_to_learner_pipeline ` are set to + False. + + The default env-to-module connector pipeline is: + [ + [0 or more user defined ConnectorV2 pieces], + AddObservationsFromEpisodesToBatch, + AddStatesFromEpisodesToBatch, + AgentToModuleMapping, # only in multi-agent setups! + BatchIndividualItems, + NumpyToTensor, + ] + The default Learner connector pipeline is: + [ + [0 or more user defined ConnectorV2 pieces], + AddObservationsFromEpisodesToBatch, + AddColumnsFromEpisodesToTrainBatch, + AddStatesFromEpisodesToBatch, + AgentToModuleMapping, # only in multi-agent setups! + BatchIndividualItems, + NumpyToTensor, + ] + + This ConnectorV2: + - Operates only on the input `data`, NOT the incoming list of episode objects + (ignored). + - In the single-agent case, `data` must already be a dict, structured as follows by + prior connector pieces of the same pipeline: + [col0] -> {[(eps_id,)]: [list of individual batch items]} + - In the multi-agent case, `data` must already be a dict, structured as follows by + prior connector pieces of the same pipeline (in particular the + `AgentToModuleMapping` piece): + [module_id] -> [col0] -> [list of individual batch items] + - Translates the above data under the different columns (e.g. "obs") into final + (batched) structures. For the single-agent case, the output `data` looks like this: + [col0] -> [possibly complex struct of batches (at the leafs)]. + For the multi-agent case, the output `data` looks like this: + [module_id] -> [col0] -> [possibly complex struct of batches (at the leafs)]. + + .. testcode:: + + from ray.rllib.connectors.common import BatchIndividualItems + from ray.rllib.utils.test_utils import check + + single_agent_data = { + "obs": { + # Note that at this stage, next-obs is not part of the data anymore .. + ("MA-EPS0",): [0, 1], + ("MA-EPS1",): [2, 3], + }, + "actions": { + # .. so we have as many actions per episode as we have observations. + ("MA-EPS0",): [4, 5], + ("MA-EPS1",): [6, 7], + }, + } + + # Create our (single-agent) connector piece. + connector = BatchIndividualItems() + + # Call the connector (and thereby batch the individual items). + output_data = connector( + rl_module=None, # This particular connector works without an RLModule. + data=single_agent_data, + episodes=[], # This particular connector works without a list of episodes. + explore=True, + shared_data={}, + ) + + # `data` should now be batched (episode IDs should have been removed from the + # struct). + check( + output_data, + {"obs": [0, 1, 2, 3], "actions": [4, 5, 6, 7]}, + ) + """ + def __init__( self, input_observation_space: Optional[gym.Space] = None, @@ -52,7 +136,7 @@ def __call__( # Multi-agent case: This connector piece should only be used after(!) # the AgentToModuleMapping connector has already been applied, leading # to a batch structure of: - # [module_id] -> [col0] -> [list of items] + # [module_id] -> [col0] -> [list of individual batch items] if is_multi_rl_module and column in rl_module: # Case, in which a column has already been properly batched before this # connector piece is called. diff --git a/rllib/connectors/common/module_to_agent_unmapping.py b/rllib/connectors/common/module_to_agent_unmapping.py index c17854db1ede..399d80d09914 100644 --- a/rllib/connectors/common/module_to_agent_unmapping.py +++ b/rllib/connectors/common/module_to_agent_unmapping.py @@ -6,8 +6,10 @@ from ray.rllib.env.multi_agent_episode import MultiAgentEpisode from ray.rllib.utils.annotations import override from ray.rllib.utils.typing import EpisodeType +from ray.util.annotations import PublicAPI +@PublicAPI(stability="alpha") class ModuleToAgentUnmapping(ConnectorV2): """Performs flipping of `data` from ModuleID- to AgentID based mapping. diff --git a/rllib/connectors/common/numpy_to_tensor.py b/rllib/connectors/common/numpy_to_tensor.py index 78cb9a02ad39..d6aa689e38a1 100644 --- a/rllib/connectors/common/numpy_to_tensor.py +++ b/rllib/connectors/common/numpy_to_tensor.py @@ -10,13 +10,45 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.torch_utils import convert_to_torch_tensor from ray.rllib.utils.typing import EpisodeType +from ray.util.annotations import PublicAPI +@PublicAPI(stability="alpha") class NumpyToTensor(ConnectorV2): """Converts numpy arrays across the entire input data into (framework) tensors. The framework information is received via the provided `rl_module` arg in the - `__call__`. + `__call__()` method. + + Note: This is one of the default env-to-module or Learner ConnectorV2 pieces that + are added automatically by RLlib into every env-to-module/Learner connector + pipeline, unless `config.add_default_connectors_to_env_to_module_pipeline` or + `config.add_default_connectors_to_learner_pipeline ` are set to + False. + + The default env-to-module connector pipeline is: + [ + [0 or more user defined ConnectorV2 pieces], + AddObservationsFromEpisodesToBatch, + AddStatesFromEpisodesToBatch, + AgentToModuleMapping, # only in multi-agent setups! + BatchIndividualItems, + NumpyToTensor, + ] + The default Learner connector pipeline is: + [ + [0 or more user defined ConnectorV2 pieces], + AddObservationsFromEpisodesToBatch, + AddColumnsFromEpisodesToTrainBatch, + AddStatesFromEpisodesToBatch, + AgentToModuleMapping, # only in multi-agent setups! + BatchIndividualItems, + NumpyToTensor, + ] + + This ConnectorV2: + - Loops through the input `data` and converts all found numpy arrays into + framework-specific tensors (possibly on a GPU). """ def __init__( @@ -37,6 +69,8 @@ def __init__( pin_mempory: Whether to pin memory when creating (torch) tensors. If None (default), pins memory if `as_learner_connector` is True, otherwise doesn't pin memory. + device: An optional device to move the resulting tensors to. If not + provided, all data will be left on the CPU. **kwargs: """ super().__init__( @@ -68,7 +102,6 @@ def __call__( is_single_agent = True data = {DEFAULT_MODULE_ID: data} - # TODO (sven): Support specifying a device (e.g. GPU). for module_id, module_data in data.copy().items(): infos = module_data.pop(Columns.INFOS, None) if rl_module.framework == "torch": diff --git a/rllib/connectors/common/tensor_to_numpy.py b/rllib/connectors/common/tensor_to_numpy.py index d3c6f9b390b2..f2def9f8566c 100644 --- a/rllib/connectors/common/tensor_to_numpy.py +++ b/rllib/connectors/common/tensor_to_numpy.py @@ -5,8 +5,10 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.numpy import convert_to_numpy from ray.rllib.utils.typing import EpisodeType +from ray.util.annotations import PublicAPI +@PublicAPI(stability="alpha") class TensorToNumpy(ConnectorV2): """Converts (framework) tensors across the entire input data into numpy arrays.""" diff --git a/rllib/connectors/env_to_module/mean_std_filter.py b/rllib/connectors/env_to_module/mean_std_filter.py index ad583ed530d3..d568a7bc36a4 100644 --- a/rllib/connectors/env_to_module/mean_std_filter.py +++ b/rllib/connectors/env_to_module/mean_std_filter.py @@ -8,11 +8,10 @@ from ray.rllib.connectors.connector_v2 import ConnectorV2 from ray.rllib.core.rl_module.rl_module import RLModule from ray.rllib.utils.annotations import override -from ray.rllib.utils.filter import MeanStdFilter as _MeanStdFilter +from ray.rllib.utils.filter import MeanStdFilter as _MeanStdFilter, RunningStat from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space from ray.rllib.utils.typing import AgentID, EpisodeType, StateDict from ray.util.annotations import PublicAPI -from ray.rllib.utils.filter import RunningStat @PublicAPI(stability="alpha") diff --git a/rllib/connectors/env_to_module/observation_preprocessor.py b/rllib/connectors/env_to_module/observation_preprocessor.py new file mode 100644 index 000000000000..9278df42bd45 --- /dev/null +++ b/rllib/connectors/env_to_module/observation_preprocessor.py @@ -0,0 +1,72 @@ +import abc +from typing import Any, List, Optional + +from ray.rllib.connectors.connector_v2 import ConnectorV2 +from ray.rllib.core.rl_module.rl_module import RLModule +from ray.rllib.utils.annotations import override +from ray.rllib.utils.typing import EpisodeType +from ray.util.annotations import PublicAPI + + +@PublicAPI(stability="alpha") +class ObservationPreprocessor(ConnectorV2, abc.ABC): + """Env-to-module connector performing one preprocessor step on the last observation. + + This is a convenience class that simplifies the writing of few-step preprocessor + connectors. + + Users must implement the `preprocess()` method, which simplifies the usual procedure + of extracting some data from a list of episodes and adding it to the batch to a mere + "old-observation --transform--> return new-observation" step. + """ + + @override(ConnectorV2) + def recompute_observation_space_from_input_spaces(self): + # Users should override this method only in case the `ObservationPreprocessor` + # changes the observation space of the pipeline. In this case, return the new + # observation space based on the incoming one (`self.input_observation_space`). + super().recompute_observation_space_from_input_spaces() + + @abc.abstractmethod + def preprocess(self, observation): + """Override to implement the preprocessing logic. + + Args: + observation: A single (non-batched) observation item for a single agent to + be processed by this connector. + + Returns: + The new observation after `observation` has been preprocessed. + """ + + @override(ConnectorV2) + def __call__( + self, + *, + rl_module: RLModule, + data: Any, + episodes: List[EpisodeType], + explore: Optional[bool] = None, + persistent_data: Optional[dict] = None, + **kwargs, + ) -> Any: + # We process and then replace observations inside the episodes directly. + # Thus, all following connectors will only see and operate on the already + # processed observation (w/o having access anymore to the original + # observations). + for sa_episode in self.single_agent_episode_iterator(episodes): + observation = sa_episode.get_observations(-1) + + # Process the observation and write the new observation back into the + # episode. + new_observation = self.preprocess(observation=observation) + sa_episode.set_observations(at_indices=-1, new_data=new_observation) + # We set the Episode's observation space to ours so that we can safely + # set the last obs to the new value (without causing a space mismatch + # error). + sa_episode.observation_space = self.observation_space + + # Leave `data` as is. RLlib's default connector will automatically + # populate the OBS column therein from the episodes' now transformed + # observations. + return data diff --git a/rllib/connectors/env_to_module/prev_actions_prev_rewards.py b/rllib/connectors/env_to_module/prev_actions_prev_rewards.py index 5b26cd1f8b87..2ba089c5f91c 100644 --- a/rllib/connectors/env_to_module/prev_actions_prev_rewards.py +++ b/rllib/connectors/env_to_module/prev_actions_prev_rewards.py @@ -9,8 +9,10 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.spaces.space_utils import batch, flatten_to_single_ndarray from ray.rllib.utils.typing import EpisodeType +from ray.util.annotations import PublicAPI +@PublicAPI(stability="alpha") class PrevActionsPrevRewards(ConnectorV2): """A connector piece that adds previous rewards and actions to the input obs. diff --git a/rllib/connectors/env_to_module/write_observations_to_episodes.py b/rllib/connectors/env_to_module/write_observations_to_episodes.py index f59fcf356151..7e460014e318 100644 --- a/rllib/connectors/env_to_module/write_observations_to_episodes.py +++ b/rllib/connectors/env_to_module/write_observations_to_episodes.py @@ -12,6 +12,21 @@ class WriteObservationsToEpisodes(ConnectorV2): """Writes the observations from the batch into the running episodes. + Note: This is one of the default env-to-module ConnectorV2 pieces that are added + automatically by RLlib into every env-to-module connector pipelines, unless + `config.add_default_connectors_to_env_to_module_pipeline` is set to False. + + The default env-to-module connector pipeline is: + [ + [0 or more user defined ConnectorV2 pieces], + AddObservationsFromEpisodesToBatch, + AddStatesFromEpisodesToBatch, + AgentToModuleMapping, # only in multi-agent setups! + BatchIndividualItems, + NumpyToTensor, + ] + + This ConnectorV2: - Operates on a batch that already has observations in it and a list of Episode objects. - Writes the observation(s) from the batch to all the given episodes. Thereby diff --git a/rllib/connectors/learner/add_columns_from_episodes_to_train_batch.py b/rllib/connectors/learner/add_columns_from_episodes_to_train_batch.py index 5c454ddcb647..1460bc8ae36f 100644 --- a/rllib/connectors/learner/add_columns_from_episodes_to_train_batch.py +++ b/rllib/connectors/learner/add_columns_from_episodes_to_train_batch.py @@ -5,13 +5,30 @@ from ray.rllib.core.rl_module.rl_module import RLModule from ray.rllib.utils.annotations import override from ray.rllib.utils.typing import EpisodeType +from ray.util.annotations import PublicAPI +@PublicAPI(stability="alpha") class AddColumnsFromEpisodesToTrainBatch(ConnectorV2): """Adds infos/actions/rewards/terminateds/... to train batch. + Note: This is one of the default Learner ConnectorV2 pieces that are added + automatically by RLlib into every Learner connector pipeline, unless + `config.add_default_connectors_to_learner_pipeline` is set to False. + + The default Learner connector pipeline is: + [ + [0 or more user defined ConnectorV2 pieces], + AddObservationsFromEpisodesToBatch, + AddColumnsFromEpisodesToTrainBatch, + AddStatesFromEpisodesToBatch, + AgentToModuleMapping, # only in multi-agent setups! + BatchIndividualItems, + NumpyToTensor, + ] + Does NOT add observations to train batch (these should have already been added - by a different ConnectorV2 piece: AddObservationsToTrainBatch) + by another ConnectorV2 piece: `AddObservationsToTrainBatch` in the same pipeline). If provided with `episodes` data, this connector piece makes sure that the final train batch going into the RLModule for updating (`forward_train()` call) contains diff --git a/rllib/connectors/learner/add_next_observations_from_episodes_to_train_batch.py b/rllib/connectors/learner/add_next_observations_from_episodes_to_train_batch.py index 4812ca43c524..fff6d57d714c 100644 --- a/rllib/connectors/learner/add_next_observations_from_episodes_to_train_batch.py +++ b/rllib/connectors/learner/add_next_observations_from_episodes_to_train_batch.py @@ -7,8 +7,10 @@ from ray.rllib.core.rl_module.rl_module import RLModule from ray.rllib.utils.annotations import override from ray.rllib.utils.typing import EpisodeType +from ray.util.annotations import PublicAPI +@PublicAPI(stability="alpha") class AddNextObservationsFromEpisodesToTrainBatch(ConnectorV2): """Adds the NEXT_OBS column with the correct episode observations to train batch. diff --git a/rllib/connectors/learner/add_one_ts_to_episodes_and_truncate.py b/rllib/connectors/learner/add_one_ts_to_episodes_and_truncate.py index 9e1cd68a88a3..0a408ed9de8c 100644 --- a/rllib/connectors/learner/add_one_ts_to_episodes_and_truncate.py +++ b/rllib/connectors/learner/add_one_ts_to_episodes_and_truncate.py @@ -6,8 +6,10 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.postprocessing.episodes import add_one_ts_to_episodes_and_truncate from ray.rllib.utils.typing import EpisodeType +from ray.util.annotations import PublicAPI +@PublicAPI(stability="alpha") class AddOneTsToEpisodesAndTruncate(ConnectorV2): """Adds an artificial timestep to all incoming episodes at the end. diff --git a/rllib/connectors/module_to_env/get_actions.py b/rllib/connectors/module_to_env/get_actions.py index cbade4e6c423..a54a230357c4 100644 --- a/rllib/connectors/module_to_env/get_actions.py +++ b/rllib/connectors/module_to_env/get_actions.py @@ -6,11 +6,32 @@ from ray.rllib.env.multi_agent_episode import MultiAgentEpisode from ray.rllib.utils.annotations import override from ray.rllib.utils.typing import EpisodeType +from ray.util.annotations import PublicAPI +@PublicAPI(stability="alpha") class GetActions(ConnectorV2): """Connector piece sampling actions from ACTION_DIST_INPUTS from an RLModule. + Note: This is one of the default module-to-env ConnectorV2 pieces that + are added automatically by RLlib into every module-to-env connector pipeline, + unless `config.add_default_connectors_to_module_to_env_pipeline` is set to + False. + + The default module-to-env connector pipeline is: + [ + GetActions, + TensorToNumpy, + UnBatchToIndividualItems, + ModuleToAgentUnmapping, # only in multi-agent setups! + RemoveSingleTsTimeRankFromBatch, + + [0 or more user defined ConnectorV2 pieces], + + NormalizeAndClipActions, + ListifyDataForVectorEnv, + ] + If necessary, this connector samples actions, given action dist. inputs and a dist. class. The connector will only sample from the action distribution, if the diff --git a/rllib/connectors/module_to_env/listify_data_for_vector_env.py b/rllib/connectors/module_to_env/listify_data_for_vector_env.py index cf042a0f556f..4c9d9e9ce807 100644 --- a/rllib/connectors/module_to_env/listify_data_for_vector_env.py +++ b/rllib/connectors/module_to_env/listify_data_for_vector_env.py @@ -7,11 +7,32 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.spaces.space_utils import batch from ray.rllib.utils.typing import EpisodeType +from ray.util.annotations import PublicAPI +@PublicAPI(stability="alpha") class ListifyDataForVectorEnv(ConnectorV2): """Performs conversion from ConnectorV2-style format to env/episode insertion. + Note: This is one of the default module-to-env ConnectorV2 pieces that + are added automatically by RLlib into every module-to-env connector pipeline, + unless `config.add_default_connectors_to_module_to_env_pipeline` is set to + False. + + The default module-to-env connector pipeline is: + [ + GetActions, + TensorToNumpy, + UnBatchToIndividualItems, + ModuleToAgentUnmapping, # only in multi-agent setups! + RemoveSingleTsTimeRankFromBatch, + + [0 or more user defined ConnectorV2 pieces], + + NormalizeAndClipActions, + ListifyDataForVectorEnv, + ] + Single agent case: Convert from: [col] -> [(env_vector_idx,)] -> [list of items]. diff --git a/rllib/connectors/module_to_env/normalize_and_clip_actions.py b/rllib/connectors/module_to_env/normalize_and_clip_actions.py index 8a95fa725c4f..16168ffc0c40 100644 --- a/rllib/connectors/module_to_env/normalize_and_clip_actions.py +++ b/rllib/connectors/module_to_env/normalize_and_clip_actions.py @@ -13,9 +13,50 @@ unsquash_action, ) from ray.rllib.utils.typing import EpisodeType +from ray.util.annotations import PublicAPI +@PublicAPI(stability="alpha") class NormalizeAndClipActions(ConnectorV2): + """Normalizes or clips actions in the input data (coming from the RLModule). + + Note: This is one of the default module-to-env ConnectorV2 pieces that + are added automatically by RLlib into every module-to-env connector pipeline, + unless `config.add_default_connectors_to_module_to_env_pipeline` is set to + False. + + The default module-to-env connector pipeline is: + [ + GetActions, + TensorToNumpy, + UnBatchToIndividualItems, + ModuleToAgentUnmapping, # only in multi-agent setups! + RemoveSingleTsTimeRankFromBatch, + + [0 or more user defined ConnectorV2 pieces], + + NormalizeAndClipActions, + ListifyDataForVectorEnv, + ] + + This ConnectorV2: + - Deep copies the Columns.ACTIONS in the incoming `data` into a new column: + Columns.ACTIONS_FOR_ENV. + - Loops through the Columns.ACTIONS in the incoming `data` and normalizes or clips + these depending on the c'tor settings in `config.normalize_actions` and + `config.clip_actions`. + - Only applies to envs with Box action spaces. + + Normalizing is the process of mapping NN-outputs (which are usually small + numbers, e.g. between -1.0 and 1.0) to the bounds defined by the action-space. + Normalizing helps the NN to learn faster in environments with large ranges between + `low` and `high` bounds or skewed action bounds (e.g. Box(-3000.0, 1.0, ...)). + + Clipping clips the actions computed by the NN (and sampled from a distribution) + between the bounds defined by the action-space. Note that clipping is only performed + if `normalize_actions` is False. + """ + @override(ConnectorV2) def recompute_action_space_from_input_spaces(self) -> gym.Space: self._action_space_struct = get_base_struct_from_space(self.input_action_space) @@ -52,9 +93,10 @@ def __init__( from the resulting distribution, then this 0.9 will be clipped to 0.5 to fit into the [-0.5 0.5] interval. """ + self._action_space_struct = None + super().__init__(input_observation_space, input_action_space, **kwargs) - self._action_space_struct = None self.normalize_actions = normalize_actions self.clip_actions = clip_actions diff --git a/rllib/connectors/module_to_env/remove_single_ts_time_rank_from_batch.py b/rllib/connectors/module_to_env/remove_single_ts_time_rank_from_batch.py index 16de99578afd..15aebf266053 100644 --- a/rllib/connectors/module_to_env/remove_single_ts_time_rank_from_batch.py +++ b/rllib/connectors/module_to_env/remove_single_ts_time_rank_from_batch.py @@ -8,9 +8,33 @@ from ray.rllib.core.rl_module.rl_module import RLModule from ray.rllib.utils.annotations import override from ray.rllib.utils.typing import EpisodeType +from ray.util.annotations import PublicAPI +@PublicAPI(stability="alpha") class RemoveSingleTsTimeRankFromBatch(ConnectorV2): + """ + Note: This is one of the default module-to-env ConnectorV2 pieces that + are added automatically by RLlib into every module-to-env connector pipeline, + unless `config.add_default_connectors_to_module_to_env_pipeline` is set to + False. + + The default module-to-env connector pipeline is: + [ + GetActions, + TensorToNumpy, + UnBatchToIndividualItems, + ModuleToAgentUnmapping, # only in multi-agent setups! + RemoveSingleTsTimeRankFromBatch, + + [0 or more user defined ConnectorV2 pieces], + + NormalizeAndClipActions, + ListifyDataForVectorEnv, + ] + + """ + @override(ConnectorV2) def __call__( self, diff --git a/rllib/connectors/module_to_env/unbatch_to_individual_items.py b/rllib/connectors/module_to_env/unbatch_to_individual_items.py index c992380adc29..5c271c5d9c78 100644 --- a/rllib/connectors/module_to_env/unbatch_to_individual_items.py +++ b/rllib/connectors/module_to_env/unbatch_to_individual_items.py @@ -8,10 +8,32 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.spaces.space_utils import unbatch from ray.rllib.utils.typing import EpisodeType +from ray.util.annotations import PublicAPI +@PublicAPI(stability="alpha") class UnBatchToIndividualItems(ConnectorV2): - """Unbatches the given `data` back into the individual-batch-items format.""" + """Unbatches the given `data` back into the individual-batch-items format. + + Note: This is one of the default module-to-env ConnectorV2 pieces that + are added automatically by RLlib into every module-to-env connector pipeline, + unless `config.add_default_connectors_to_module_to_env_pipeline` is set to + False. + + The default module-to-env connector pipeline is: + [ + GetActions, + TensorToNumpy, + UnBatchToIndividualItems, + ModuleToAgentUnmapping, # only in multi-agent setups! + RemoveSingleTsTimeRankFromBatch, + + [0 or more user defined ConnectorV2 pieces], + + NormalizeAndClipActions, + ListifyDataForVectorEnv, + ] + """ @override(ConnectorV2) def __call__(