diff --git a/docs/source/features/hydra.rst b/docs/source/features/hydra.rst index 2fa5d2d706..89673dbe35 100644 --- a/docs/source/features/hydra.rst +++ b/docs/source/features/hydra.rst @@ -115,7 +115,7 @@ For example, for the configuration of the Cartpole camera depth environment: :emphasize-lines: 16 If the user were to modify the width of the camera, i.e. ``env.tiled_camera.width=128``, then the parameter -``env.num_observations=10240`` (1*80*128) must be updated and given as input as well. +``env.observation_space=[80,128,1]`` must be updated and given as input as well. Similarly, the ``__post_init__`` method is not updated with the command line inputs. In the ``LocomotionVelocityRoughEnvCfg``, for example, the post init update is as follows: diff --git a/docs/source/migration/migrating_from_isaacgymenvs.rst b/docs/source/migration/migrating_from_isaacgymenvs.rst index c903d83c3d..2073854d32 100644 --- a/docs/source/migration/migrating_from_isaacgymenvs.rst +++ b/docs/source/migration/migrating_from_isaacgymenvs.rst @@ -45,9 +45,9 @@ Below is an example skeleton of a task config class: # env decimation = 2 episode_length_s = 5.0 - num_actions = 1 - num_observations = 4 - num_states = 0 + action_space = 1 + observation_space = 4 + state_space = 0 # task-specific parameters ... @@ -135,9 +135,9 @@ The following parameters must be set for each environment config: decimation = 2 episode_length_s = 5.0 - num_actions = 1 - num_observations = 4 - num_states = 0 + action_space = 1 + observation_space = 4 + state_space = 0 Note that the maximum episode length parameter (now ``episode_length_s``) is in seconds instead of steps as it was in IsaacGymEnvs. To convert between step count to seconds, use the equation: @@ -569,9 +569,9 @@ Task Config | | decimation = 2 | | asset: | episode_length_s = 5.0 | | assetRoot: "../../assets" | action_scale = 100.0 # [N] | -| assetFileName: "urdf/cartpole.urdf" | num_actions = 1 | -| | num_observations = 4 | -| enableCameraSensors: False | num_states = 0 | +| assetFileName: "urdf/cartpole.urdf" | action_space = 1 | +| | observation_space = 4 | +| enableCameraSensors: False | state_space = 0 | | | # reset | | sim: | max_cart_pos = 3.0 | | dt: 0.0166 # 1/60 s | initial_pole_angle_range = [-0.25, 0.25] | diff --git a/docs/source/migration/migrating_from_omniisaacgymenvs.rst b/docs/source/migration/migrating_from_omniisaacgymenvs.rst index 50f9d5b9d6..cbda1e8d45 100644 --- a/docs/source/migration/migrating_from_omniisaacgymenvs.rst +++ b/docs/source/migration/migrating_from_omniisaacgymenvs.rst @@ -46,9 +46,9 @@ Below is an example skeleton of a task config class: # env decimation = 2 episode_length_s = 5.0 - num_actions = 1 - num_observations = 4 - num_states = 0 + action_space = 1 + observation_space = 4 + state_space = 0 # task-specific parameters ... @@ -158,9 +158,9 @@ The following parameters must be set for each environment config: decimation = 2 episode_length_s = 5.0 - num_actions = 1 - num_observations = 4 - num_states = 0 + action_space = 1 + observation_space = 4 + state_space = 0 RL Config Setup @@ -501,9 +501,9 @@ Task config in Isaac Lab can be split into the main task configuration class and | clipObservations: 5.0 | decimation = 2 | | clipActions: 1.0 | episode_length_s = 5.0 | | controlFrequencyInv: 2 # 60 Hz | action_scale = 100.0 # [N] | -| | num_actions = 1 | -| sim: | num_observations = 4 | -| | num_states = 0 | +| | action_space = 1 | +| sim: | observation_space = 4 | +| | state_space = 0 | | dt: 0.0083 # 1/120 s | # reset | | use_gpu_pipeline: ${eq:${...pipeline},"gpu"} | max_cart_pos = 3.0 | | gravity: [0.0, 0.0, -9.81] | initial_pole_angle_range = [-0.25, 0.25] | diff --git a/docs/source/refs/snippets/tutorial_modify_direct_rl_env.py b/docs/source/refs/snippets/tutorial_modify_direct_rl_env.py index 8073886840..215ef3bd4f 100644 --- a/docs/source/refs/snippets/tutorial_modify_direct_rl_env.py +++ b/docs/source/refs/snippets/tutorial_modify_direct_rl_env.py @@ -28,8 +28,8 @@ # [end-h1_env-import] # [start-h1_env-spaces] -num_actions = 19 -num_observations = 69 +action_space = 19 +observation_space = 69 # [end-h1_env-spaces] # [start-h1_env-robot] diff --git a/docs/source/tutorials/03_envs/create_direct_rl_env.rst b/docs/source/tutorials/03_envs/create_direct_rl_env.rst index ab5136106a..a4b945be9d 100644 --- a/docs/source/tutorials/03_envs/create_direct_rl_env.rst +++ b/docs/source/tutorials/03_envs/create_direct_rl_env.rst @@ -48,9 +48,9 @@ config should define the number of actions and observations for the environment. @configclass class CartpoleEnvCfg(DirectRLEnvCfg): ... - num_actions = 1 - num_observations = 4 - num_states = 0 + action_space = 1 + observation_space = 4 + state_space = 0 The config class can also be used to define task-specific attributes, such as scaling for reward terms and thresholds for reset conditions. diff --git a/source/extensions/omni.isaac.lab/config/extension.toml b/source/extensions/omni.isaac.lab/config/extension.toml index 517ac00610..a633e64fb8 100644 --- a/source/extensions/omni.isaac.lab/config/extension.toml +++ b/source/extensions/omni.isaac.lab/config/extension.toml @@ -1,7 +1,7 @@ [package] # Note: Semantic Versioning is used: https://semver.org/ -version = "0.25.1" +version = "0.25.2" # Description title = "Isaac Lab framework for Robot Learning" diff --git a/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst b/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst index e78abdb068..12e4b5b45a 100644 --- a/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst +++ b/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst @@ -1,6 +1,25 @@ Changelog --------- +0.25.2 (2024-10-16) +~~~~~~~~~~~~~~~~~~~~ + +Added +^^^^^ + +* Added support for different Gymnasium spaces (``Box``, ``Discrete``, ``MultiDiscrete``, ``Tuple`` and ``Dict``) + to define observation, action and state spaces in the direct workflow. +* Added :meth:`sample_space` to environment utils to sample supported spaces where data containers are torch tensors. + +Changed +^^^^^^^ + +* Mark the :attr:`num_observations`, :attr:`num_actions` and :attr:`num_states` in :class:`DirectRLEnvCfg` as deprecated + in favor of :attr:`observation_space`, :attr:`action_space` and :attr:`state_space` respectively. +* Mark the :attr:`num_observations`, :attr:`num_actions` and :attr:`num_states` in :class:`DirectMARLEnvCfg` as deprecated + in favor of :attr:`observation_spaces`, :attr:`action_spaces` and :attr:`state_space` respectively. + + 0.25.1 (2024-10-10) ~~~~~~~~~~~~~~~~~~~ diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/__init__.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/__init__.py index 97701e50cc..667c0b5049 100644 --- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/__init__.py +++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/__init__.py @@ -52,4 +52,4 @@ from .manager_based_env_cfg import ManagerBasedEnvCfg from .manager_based_rl_env import ManagerBasedRLEnv from .manager_based_rl_env_cfg import ManagerBasedRLEnvCfg -from .utils import multi_agent_to_single_agent, multi_agent_with_one_agent +from .utils.marl import multi_agent_to_single_agent, multi_agent_with_one_agent diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/common.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/common.py index d0c99f8ad8..7d6b02d309 100644 --- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/common.py +++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/common.py @@ -5,6 +5,7 @@ from __future__ import annotations +import gymnasium as gym import torch from typing import Dict, Literal, TypeVar @@ -62,6 +63,9 @@ class ViewerCfg: # Types. ## +SpaceType = TypeVar("SpaceType", gym.spaces.Space, int, set, tuple, list, dict) +"""A sentinel object to indicate a valid space type to specify states, observations and actions.""" + VecEnvObs = Dict[str, torch.Tensor | Dict[str, torch.Tensor]] """Observation returned by the environment. diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py index f58a80dd47..5bcedb0591 100644 --- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py +++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py @@ -14,6 +14,7 @@ import weakref from abc import abstractmethod from collections.abc import Sequence +from dataclasses import MISSING from typing import Any, ClassVar import omni.isaac.core.utils.torch as torch_utils @@ -30,6 +31,7 @@ from .common import ActionType, AgentID, EnvStepReturn, ObsType, StateType from .direct_marl_env_cfg import DirectMARLEnvCfg from .ui import ViewportCameraController +from .utils.spaces import sample_space, spec_to_gym_space class DirectMARLEnv: @@ -164,10 +166,6 @@ def __init__(self, cfg: DirectMARLEnvCfg, render_mode: str | None = None, **kwar # -- init buffers self.episode_length_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.long) self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.sim.device) - self.actions = { - agent: torch.zeros(self.num_envs, self.cfg.num_actions[agent], device=self.sim.device) - for agent in self.cfg.possible_agents - } # setup the observation, state and action spaces self._configure_env_spaces() @@ -406,16 +404,19 @@ def state(self) -> StateType | None: """Returns the state for the environment. The state-space is used for centralized training or asymmetric actor-critic architectures. It is configured - using the :attr:`DirectMARLEnvCfg.num_states` parameter. + using the :attr:`DirectMARLEnvCfg.state_space` parameter. Returns: - The states for the environment, or None if :attr:`DirectMARLEnvCfg.num_states` parameter is zero. + The states for the environment, or None if :attr:`DirectMARLEnvCfg.state_space` parameter is zero. """ - if not self.cfg.num_states: + if not self.cfg.state_space: return None # concatenate and return the observations as state - if self.cfg.num_states < 0: - self.state_buf = torch.cat([self.obs_dict[agent] for agent in self.cfg.possible_agents], dim=-1) + # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces + if isinstance(self.cfg.state_space, int) and self.cfg.state_space < 0: + self.state_buf = torch.cat( + [self.obs_dict[agent].reshape(self.num_envs, -1) for agent in self.cfg.possible_agents], dim=-1 + ) # compute and return custom environment state else: self.state_buf = self._get_states() @@ -568,25 +569,45 @@ def _configure_env_spaces(self): self.agents = self.cfg.possible_agents self.possible_agents = self.cfg.possible_agents + # show deprecation message and overwrite configuration + if self.cfg.num_actions is not None: + omni.log.warn("DirectMARLEnvCfg.num_actions is deprecated. Use DirectMARLEnvCfg.action_spaces instead.") + if isinstance(self.cfg.action_spaces, type(MISSING)): + self.cfg.action_spaces = self.cfg.num_actions + if self.cfg.num_observations is not None: + omni.log.warn( + "DirectMARLEnvCfg.num_observations is deprecated. Use DirectMARLEnvCfg.observation_spaces instead." + ) + if isinstance(self.cfg.observation_spaces, type(MISSING)): + self.cfg.observation_spaces = self.cfg.num_observations + if self.cfg.num_states is not None: + omni.log.warn("DirectMARLEnvCfg.num_states is deprecated. Use DirectMARLEnvCfg.state_space instead.") + if isinstance(self.cfg.state_space, type(MISSING)): + self.cfg.state_space = self.cfg.num_states + # set up observation and action spaces self.observation_spaces = { - agent: gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.cfg.num_observations[agent],)) - for agent in self.cfg.possible_agents + agent: spec_to_gym_space(self.cfg.observation_spaces[agent]) for agent in self.cfg.possible_agents } self.action_spaces = { - agent: gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.cfg.num_actions[agent],)) - for agent in self.cfg.possible_agents + agent: spec_to_gym_space(self.cfg.action_spaces[agent]) for agent in self.cfg.possible_agents } # set up state space - if not self.cfg.num_states: + if not self.cfg.state_space: self.state_space = None - if self.cfg.num_states < 0: - self.state_space = gym.spaces.Box( - low=-np.inf, high=np.inf, shape=(sum(self.cfg.num_observations.values()),) + if isinstance(self.cfg.state_space, int) and self.cfg.state_space < 0: + self.state_space = gym.spaces.flatten_space( + gym.spaces.Tuple([self.observation_spaces[agent] for agent in self.cfg.possible_agents]) ) else: - self.state_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.cfg.num_states,)) + self.state_space = spec_to_gym_space(self.cfg.state_space) + + # instantiate actions (needed for tasks for which the observations computation is dependent on the actions) + self.actions = { + agent: sample_space(self.action_spaces[agent], self.sim.device, batch_size=self.num_envs, fill_value=0) + for agent in self.cfg.possible_agents + } def _reset_idx(self, env_ids: Sequence[int]): """Reset environments based on specified indices. @@ -664,8 +685,8 @@ def _get_observations(self) -> dict[AgentID, ObsType]: def _get_states(self) -> StateType: """Compute and return the states for the environment. - This method is only called (and therefore has to be implemented) when the :attr:`DirectMARLEnvCfg.num_states` - parameter is greater than zero. + This method is only called (and therefore has to be implemented) when the :attr:`DirectMARLEnvCfg.state_space` + parameter is not a number less than or equal to zero. Returns: The states for the environment. diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py index 3dcf364f5c..40ecb64297 100644 --- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py +++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py @@ -10,7 +10,7 @@ from omni.isaac.lab.utils import configclass from omni.isaac.lab.utils.noise import NoiseModelCfg -from .common import AgentID, ViewerCfg +from .common import AgentID, SpaceType, ViewerCfg from .ui import BaseEnvWindow @@ -104,11 +104,39 @@ class DirectMARLEnvCfg: Please refer to the :class:`omni.isaac.lab.managers.EventManager` class for more details. """ - num_observations: dict[AgentID, int] = MISSING - """The dimension of the observation space from each agent.""" + observation_spaces: dict[AgentID, SpaceType] = MISSING + """Observation space definition for each agent. + + The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed + specification of the space is desired) or basic Python data types (for simplicity). + + .. list-table:: + :header-rows: 1 + + * - Gymnasium space + - Python data type + * - :class:`~gymnasium.spaces.Box` + - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``) + * - :class:`~gymnasium.spaces.Discrete` + - Single-element set (e.g.: ``{2}``) + * - :class:`~gymnasium.spaces.MultiDiscrete` + - List of single-element sets (e.g.: ``[{2}, {5}]``) + * - :class:`~gymnasium.spaces.Dict` + - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``) + * - :class:`~gymnasium.spaces.Tuple` + - Tuple (e.g.: ``(7, [64, 64, 3], {2})``) + """ - num_states: int = MISSING - """The dimension of the state space from each environment instance. + num_observations: dict[AgentID, int] | None = None + """The dimension of the observation space for each agent. + + .. warning:: + + This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectMARLEnvCfg.observation_spaces` instead. + """ + + state_space: SpaceType = MISSING + """State space definition. The following values are supported: @@ -116,6 +144,33 @@ class DirectMARLEnvCfg: * 0: No state-space will be constructed (`state_space` is None). This is useful to save computational resources when the algorithm to be trained does not need it. * greater than 0: Custom state-space dimension to be provided by the task implementation. + + The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed + specification of the space is desired) or basic Python data types (for simplicity). + + .. list-table:: + :header-rows: 1 + + * - Gymnasium space + - Python data type + * - :class:`~gymnasium.spaces.Box` + - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``) + * - :class:`~gymnasium.spaces.Discrete` + - Single-element set (e.g.: ``{2}``) + * - :class:`~gymnasium.spaces.MultiDiscrete` + - List of single-element sets (e.g.: ``[{2}, {5}]``) + * - :class:`~gymnasium.spaces.Dict` + - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``) + * - :class:`~gymnasium.spaces.Tuple` + - Tuple (e.g.: ``(7, [64, 64, 3], {2})``) + """ + + num_states: int | None = None + """The dimension of the state space from each environment instance. + + .. warning:: + + This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectMARLEnvCfg.state_space` instead. """ observation_noise_model: dict[AgentID, NoiseModelCfg | None] | None = None @@ -124,8 +179,36 @@ class DirectMARLEnvCfg: Please refer to the :class:`omni.isaac.lab.utils.noise.NoiseModel` class for more details. """ - num_actions: dict[AgentID, int] = MISSING - """The dimension of the action space for each agent.""" + action_spaces: dict[AgentID, SpaceType] = MISSING + """Action space definition for each agent. + + The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed + specification of the space is desired) or basic Python data types (for simplicity). + + .. list-table:: + :header-rows: 1 + + * - Gymnasium space + - Python data type + * - :class:`~gymnasium.spaces.Box` + - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``) + * - :class:`~gymnasium.spaces.Discrete` + - Single-element set (e.g.: ``{2}``) + * - :class:`~gymnasium.spaces.MultiDiscrete` + - List of single-element sets (e.g.: ``[{2}, {5}]``) + * - :class:`~gymnasium.spaces.Dict` + - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``) + * - :class:`~gymnasium.spaces.Tuple` + - Tuple (e.g.: ``(7, [64, 64, 3], {2})``) + """ + + num_actions: dict[AgentID, int] | None = None + """The dimension of the action space for each agent. + + .. warning:: + + This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectMARLEnvCfg.action_spaces` instead. + """ action_noise_model: dict[AgentID, NoiseModelCfg | None] | None = None """The noise model applied to the actions provided to the environment. Default is None, which means no noise is added. diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py index 5663977fda..2a0e88cb63 100644 --- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py +++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py @@ -14,6 +14,7 @@ import weakref from abc import abstractmethod from collections.abc import Sequence +from dataclasses import MISSING from typing import Any, ClassVar import omni.isaac.core.utils.torch as torch_utils @@ -30,6 +31,7 @@ from .common import VecEnvObs, VecEnvStepReturn from .direct_rl_env_cfg import DirectRLEnvCfg from .ui import ViewportCameraController +from .utils.spaces import sample_space, spec_to_gym_space class DirectRLEnv(gym.Env): @@ -171,7 +173,6 @@ def __init__(self, cfg: DirectRLEnvCfg, render_mode: str | None = None, **kwargs self.reset_terminated = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool) self.reset_time_outs = torch.zeros_like(self.reset_terminated) self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.sim.device) - self.actions = torch.zeros(self.num_envs, self.cfg.num_actions, device=self.sim.device) # setup the action and observation spaces for Gym self._configure_gym_env_spaces() @@ -507,27 +508,40 @@ def set_debug_vis(self, debug_vis: bool) -> bool: def _configure_gym_env_spaces(self): """Configure the action and observation spaces for the Gym environment.""" - # observation space (unbounded since we don't impose any limits) - self.num_actions = self.cfg.num_actions - self.num_observations = self.cfg.num_observations - self.num_states = self.cfg.num_states + # show deprecation message and overwrite configuration + if self.cfg.num_actions is not None: + omni.log.warn("DirectRLEnvCfg.num_actions is deprecated. Use DirectRLEnvCfg.action_space instead.") + if isinstance(self.cfg.action_space, type(MISSING)): + self.cfg.action_space = self.cfg.num_actions + if self.cfg.num_observations is not None: + omni.log.warn( + "DirectRLEnvCfg.num_observations is deprecated. Use DirectRLEnvCfg.observation_space instead." + ) + if isinstance(self.cfg.observation_space, type(MISSING)): + self.cfg.observation_space = self.cfg.num_observations + if self.cfg.num_states is not None: + omni.log.warn("DirectRLEnvCfg.num_states is deprecated. Use DirectRLEnvCfg.state_space instead.") + if isinstance(self.cfg.state_space, type(MISSING)): + self.cfg.state_space = self.cfg.num_states # set up spaces self.single_observation_space = gym.spaces.Dict() - self.single_observation_space["policy"] = gym.spaces.Box( - low=-np.inf, high=np.inf, shape=(self.num_observations,) - ) - self.single_action_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.num_actions,)) + self.single_observation_space["policy"] = spec_to_gym_space(self.cfg.observation_space) + self.single_action_space = spec_to_gym_space(self.cfg.action_space) # batch the spaces for vectorized environments self.observation_space = gym.vector.utils.batch_space(self.single_observation_space["policy"], self.num_envs) self.action_space = gym.vector.utils.batch_space(self.single_action_space, self.num_envs) # optional state space for asymmetric actor-critic architectures - if self.num_states > 0: - self.single_observation_space["critic"] = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.num_states,)) + self.state_space = None + if self.cfg.state_space > 0: + self.single_observation_space["critic"] = spec_to_gym_space(self.cfg.state_space) self.state_space = gym.vector.utils.batch_space(self.single_observation_space["critic"], self.num_envs) + # instantiate actions (needed for tasks for which the observations computation is dependent on the actions) + self.actions = sample_space(self.single_action_space, self.sim.device, batch_size=self.num_envs, fill_value=0) + def _reset_idx(self, env_ids: Sequence[int]): """Reset environments based on specified indices. @@ -601,7 +615,7 @@ def _get_states(self) -> VecEnvObs | None: """Compute and return the states for the environment. The state-space is used for asymmetric actor-critic architectures. It is configured - using the :attr:`DirectRLEnvCfg.num_states` parameter. + using the :attr:`DirectRLEnvCfg.state_space` parameter. Returns: The states for the environment. If the environment does not have a state-space, the function diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py index ad8c6c18c8..e86b366cc2 100644 --- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py +++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py @@ -10,7 +10,7 @@ from omni.isaac.lab.utils import configclass from omni.isaac.lab.utils.noise import NoiseModelCfg -from .common import ViewerCfg +from .common import SpaceType, ViewerCfg from .ui import BaseEnvWindow @@ -104,13 +104,68 @@ class DirectRLEnvCfg: Please refer to the :class:`omni.isaac.lab.managers.EventManager` class for more details. """ - num_observations: int = MISSING - """The dimension of the observation space from each environment instance.""" + observation_space: SpaceType = MISSING + """Observation space definition. + + The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed + specification of the space is desired) or basic Python data types (for simplicity). + + .. list-table:: + :header-rows: 1 + + * - Gymnasium space + - Python data type + * - :class:`~gymnasium.spaces.Box` + - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``) + * - :class:`~gymnasium.spaces.Discrete` + - Single-element set (e.g.: ``{2}``) + * - :class:`~gymnasium.spaces.MultiDiscrete` + - List of single-element sets (e.g.: ``[{2}, {5}]``) + * - :class:`~gymnasium.spaces.Dict` + - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``) + * - :class:`~gymnasium.spaces.Tuple` + - Tuple (e.g.: ``(7, [64, 64, 3], {2})``) + """ + + num_observations: int | None = None + """The dimension of the observation space from each environment instance. + + .. warning:: + + This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectRLEnvCfg.observation_space` instead. + """ - num_states: int = 0 - """The dimension of the state-space from each environment instance. Default is 0, which means no state-space is defined. + state_space: SpaceType = MISSING + """State space definition. This is useful for asymmetric actor-critic and defines the observation space for the critic. + + The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed + specification of the space is desired) or basic Python data types (for simplicity). + + .. list-table:: + :header-rows: 1 + + * - Gymnasium space + - Python data type + * - :class:`~gymnasium.spaces.Box` + - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``) + * - :class:`~gymnasium.spaces.Discrete` + - Single-element set (e.g.: ``{2}``) + * - :class:`~gymnasium.spaces.MultiDiscrete` + - List of single-element sets (e.g.: ``[{2}, {5}]``) + * - :class:`~gymnasium.spaces.Dict` + - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``) + * - :class:`~gymnasium.spaces.Tuple` + - Tuple (e.g.: ``(7, [64, 64, 3], {2})``) + """ + + num_states: int | None = None + """The dimension of the state-space from each environment instance. + + .. warning:: + + This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectRLEnvCfg.state_space` instead. """ observation_noise_model: NoiseModelCfg | None = None @@ -119,8 +174,36 @@ class DirectRLEnvCfg: Please refer to the :class:`omni.isaac.lab.utils.noise.NoiseModel` class for more details. """ - num_actions: int = MISSING - """The dimension of the action space for each environment.""" + action_space: SpaceType = MISSING + """Action space definition. + + The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed + specification of the space is desired) or basic Python data types (for simplicity). + + .. list-table:: + :header-rows: 1 + + * - Gymnasium space + - Python data type + * - :class:`~gymnasium.spaces.Box` + - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``) + * - :class:`~gymnasium.spaces.Discrete` + - Single-element set (e.g.: ``{2}``) + * - :class:`~gymnasium.spaces.MultiDiscrete` + - List of single-element sets (e.g.: ``[{2}, {5}]``) + * - :class:`~gymnasium.spaces.Dict` + - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``) + * - :class:`~gymnasium.spaces.Tuple` + - Tuple (e.g.: ``(7, [64, 64, 3], {2})``) + """ + + num_actions: int | None = None + """The dimension of the action space for each environment. + + .. warning:: + + This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectRLEnvCfg.action_space` instead. + """ action_noise_model: NoiseModelCfg | None = None """The noise model applied to the actions provided to the environment. Default is None, which means no noise is added. diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/__init__.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/__init__.py new file mode 100644 index 0000000000..913e1edb90 --- /dev/null +++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2022-2024, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Sub-package for environment utils.""" diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py similarity index 76% rename from source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils.py rename to source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py index cacbdeaf81..46519048ae 100644 --- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils.py +++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py @@ -9,9 +9,9 @@ import torch from typing import Any -from .common import ActionType, AgentID, EnvStepReturn, ObsType, StateType, VecEnvObs, VecEnvStepReturn -from .direct_marl_env import DirectMARLEnv -from .direct_rl_env import DirectRLEnv +from ..common import ActionType, AgentID, EnvStepReturn, ObsType, StateType, VecEnvObs, VecEnvStepReturn +from ..direct_marl_env import DirectMARLEnv +from ..direct_rl_env import DirectRLEnv def multi_agent_to_single_agent(env: DirectMARLEnv, state_as_observation: bool = False) -> DirectRLEnv: @@ -39,7 +39,7 @@ def multi_agent_to_single_agent(env: DirectMARLEnv, state_as_observation: bool = Raises: AssertionError: If the environment state cannot be used as observation since it was explicitly defined - as unconstructed (:attr:`DirectMARLEnvCfg.num_states`). + as unconstructed (:attr:`DirectMARLEnvCfg.state_space`). """ class Env(DirectRLEnv): @@ -49,7 +49,7 @@ def __init__(self, env: DirectMARLEnv) -> None: # check if it is possible to use the multi-agent environment state as single-agent observation self._state_as_observation = state_as_observation if self._state_as_observation: - assert self.env.cfg.num_states != 0, ( + assert self.env.cfg.state_space != 0, ( "The environment state cannot be used as observation since it was explicitly defined as" " unconstructed" ) @@ -58,18 +58,17 @@ def __init__(self, env: DirectMARLEnv) -> None: self.cfg = self.env.cfg self.sim = self.env.sim self.scene = self.env.scene - self.num_actions = sum(self.env.cfg.num_actions.values()) - self.num_observations = sum(self.env.cfg.num_observations.values()) - self.num_states = self.env.cfg.num_states self.single_observation_space = gym.spaces.Dict() if self._state_as_observation: self.single_observation_space["policy"] = self.env.state_space else: - self.single_observation_space["policy"] = gym.spaces.Box( - low=-np.inf, high=np.inf, shape=(self.num_observations,) + self.single_observation_space["policy"] = gym.spaces.flatten_space( + gym.spaces.Tuple([self.env.observation_spaces[agent] for agent in self.env.possible_agents]) ) - self.single_action_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.num_actions,)) + self.single_action_space = gym.spaces.flatten_space( + gym.spaces.Tuple([self.env.action_spaces[agent] for agent in self.env.possible_agents]) + ) # batch the spaces for vectorized environments self.observation_space = gym.vector.utils.batch_space( @@ -84,18 +83,25 @@ def reset(self, seed: int | None = None, options: dict[str, Any] | None = None) if self._state_as_observation: obs = {"policy": self.env.state()} # concatenate agents' observations + # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces else: - obs = {"policy": torch.cat([obs[agent] for agent in self.env.possible_agents], dim=-1)} + obs = { + "policy": torch.cat( + [obs[agent].reshape(self.num_envs, -1) for agent in self.env.possible_agents], dim=-1 + ) + } return obs, extras def step(self, action: torch.Tensor) -> VecEnvStepReturn: # split single-agent actions to build the multi-agent ones + # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces index = 0 _actions = {} for agent in self.env.possible_agents: - _actions[agent] = action[:, index : index + self.env.cfg.num_actions[agent]] - index += self.env.cfg.num_actions[agent] + delta = gym.spaces.flatdim(self.env.action_spaces[agent]) + _actions[agent] = action[:, index : index + delta] + index += delta # step the environment obs, rewards, terminated, time_outs, extras = self.env.step(_actions) @@ -104,8 +110,13 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn: if self._state_as_observation: obs = {"policy": self.env.state()} # concatenate agents' observations + # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces else: - obs = {"policy": torch.cat([obs[agent] for agent in self.env.possible_agents], dim=-1)} + obs = { + "policy": torch.cat( + [obs[agent].reshape(self.num_envs, -1) for agent in self.env.possible_agents], dim=-1 + ) + } # process environment outputs to return single-agent data rewards = sum(rewards.values()) @@ -147,7 +158,7 @@ def multi_agent_with_one_agent(env: DirectMARLEnv, state_as_observation: bool = Raises: AssertionError: If the environment state cannot be used as observation since it was explicitly defined - as unconstructed (:attr:`DirectMARLEnvCfg.num_states`). + as unconstructed (:attr:`DirectMARLEnvCfg.state_space`). """ class Env(DirectMARLEnv): @@ -157,7 +168,7 @@ def __init__(self, env: DirectMARLEnv) -> None: # check if it is possible to use the multi-agent environment state as agent observation self._state_as_observation = state_as_observation if self._state_as_observation: - assert self.env.cfg.num_states != 0, ( + assert self.env.cfg.state_space != 0, ( "The environment state cannot be used as observation since it was explicitly defined as" " unconstructed" ) @@ -170,13 +181,13 @@ def __init__(self, env: DirectMARLEnv) -> None: self._exported_observation_spaces = {self._agent_id: self.env.state_space} else: self._exported_observation_spaces = { - self._agent_id: gym.spaces.Box( - low=-np.inf, high=np.inf, shape=(sum(self.env.cfg.num_observations.values()),) + self._agent_id: gym.spaces.flatten_space( + gym.spaces.Tuple([self.env.observation_spaces[agent] for agent in self.env.possible_agents]) ) } self._exported_action_spaces = { - self._agent_id: gym.spaces.Box( - low=-np.inf, high=np.inf, shape=(sum(self.env.cfg.num_actions.values()),) + self._agent_id: gym.spaces.flatten_space( + gym.spaces.Tuple([self.env.action_spaces[agent] for agent in self.env.possible_agents]) ) } @@ -208,18 +219,25 @@ def reset( if self._state_as_observation: obs = {self._agent_id: self.env.state()} # concatenate agents' observations + # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces else: - obs = {self._agent_id: torch.cat([obs[agent] for agent in self.env.possible_agents], dim=-1)} + obs = { + self._agent_id: torch.cat( + [obs[agent].reshape(self.num_envs, -1) for agent in self.env.possible_agents], dim=-1 + ) + } return obs, extras def step(self, actions: dict[AgentID, ActionType]) -> EnvStepReturn: # split agent actions to build the multi-agent ones + # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces index = 0 _actions = {} for agent in self.env.possible_agents: - _actions[agent] = actions[self._agent_id][:, index : index + self.env.cfg.num_actions[agent]] - index += self.env.cfg.num_actions[agent] + delta = gym.spaces.flatdim(self.env.action_spaces[agent]) + _actions[agent] = actions[self._agent_id][:, index : index + delta] + index += delta # step the environment obs, rewards, terminated, time_outs, extras = self.env.step(_actions) @@ -228,8 +246,13 @@ def step(self, actions: dict[AgentID, ActionType]) -> EnvStepReturn: if self._state_as_observation: obs = {self._agent_id: self.env.state()} # concatenate agents' observations + # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces else: - obs = {self._agent_id: torch.cat([obs[agent] for agent in self.env.possible_agents], dim=-1)} + obs = { + self._agent_id: torch.cat( + [obs[agent].reshape(self.num_envs, -1) for agent in self.env.possible_agents], dim=-1 + ) + } # process environment outputs to return agent data rewards = {self._agent_id: sum(rewards.values())} diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/spaces.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/spaces.py new file mode 100644 index 0000000000..8604392ec6 --- /dev/null +++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/spaces.py @@ -0,0 +1,92 @@ +# Copyright (c) 2022-2024, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym +import numpy as np +import torch +from typing import Any + +from ..common import SpaceType + + +def spec_to_gym_space(spec: SpaceType) -> gym.spaces.Space: + """Generate an appropriate Gymnasium space according to the given space specification. + + Args: + spec: Space specification. + + Returns: + Gymnasium space. + + Raises: + ValueError: If the given space specification is not valid/supported. + """ + if isinstance(spec, gym.spaces.Space): + return spec + # fundamental spaces + # Box + elif isinstance(spec, int): + return gym.spaces.Box(low=-np.inf, high=np.inf, shape=(spec,)) + elif isinstance(spec, list) and all(isinstance(x, int) for x in spec): + return gym.spaces.Box(low=-np.inf, high=np.inf, shape=spec) + # Discrete + elif isinstance(spec, set) and len(spec) == 1: + return gym.spaces.Discrete(n=next(iter(spec))) + # MultiDiscrete + elif isinstance(spec, list) and all(isinstance(x, set) and len(x) == 1 for x in spec): + return gym.spaces.MultiDiscrete(nvec=[next(iter(x)) for x in spec]) + # composite spaces + # Tuple + elif isinstance(spec, tuple): + return gym.spaces.Tuple([spec_to_gym_space(x) for x in spec]) + # Dict + elif isinstance(spec, dict): + return gym.spaces.Dict({k: spec_to_gym_space(v) for k, v in spec.items()}) + raise ValueError(f"Unsupported space specification: {spec}") + + +def sample_space(space: gym.spaces.Space, device: str, batch_size: int = -1, fill_value: float | None = None) -> Any: + """Sample a Gymnasium space where the data container are PyTorch tensors. + + Args: + space: Gymnasium space. + device: The device where the tensor should be created. + batch_size: Batch size. If the specified value is greater than zero, a batched space will be created and sampled from it. + fill_value: The value to fill the created tensors with. If None (default value), tensors will keep their random values. + + Returns: + Tensorized sampled space. + """ + + def tensorize(s, x): + if isinstance(s, gym.spaces.Box): + tensor = torch.tensor(x, device=device, dtype=torch.float32).reshape(batch_size, *s.shape) + if fill_value is not None: + tensor.fill_(fill_value) + return tensor + elif isinstance(s, gym.spaces.Discrete): + if isinstance(x, np.ndarray): + tensor = torch.tensor(x, device=device, dtype=torch.int64).reshape(batch_size, 1) + if fill_value is not None: + tensor.fill_(int(fill_value)) + return tensor + elif isinstance(x, np.number) or type(x) in [int, float]: + tensor = torch.tensor([x], device=device, dtype=torch.int64).reshape(batch_size, 1) + if fill_value is not None: + tensor.fill_(int(fill_value)) + return tensor + elif isinstance(s, gym.spaces.MultiDiscrete): + if isinstance(x, np.ndarray): + tensor = torch.tensor(x, device=device, dtype=torch.int64).reshape(batch_size, *s.shape) + if fill_value is not None: + tensor.fill_(int(fill_value)) + return tensor + elif isinstance(s, gym.spaces.Dict): + return {k: tensorize(_s, x[k]) for k, _s in s.items()} + elif isinstance(s, gym.spaces.Tuple): + return tuple([tensorize(_s, v) for _s, v in zip(s, x)]) + + sample = (gym.vector.utils.batch_space(space, batch_size) if batch_size > 0 else space).sample() + return tensorize(space, sample) diff --git a/source/extensions/omni.isaac.lab/test/envs/test_spaces_utils.py b/source/extensions/omni.isaac.lab/test/envs/test_spaces_utils.py new file mode 100644 index 0000000000..274f0de650 --- /dev/null +++ b/source/extensions/omni.isaac.lab/test/envs/test_spaces_utils.py @@ -0,0 +1,122 @@ +# Copyright (c) 2022-2024, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +# ignore private usage of variables warning +# pyright: reportPrivateUsage=none + +from __future__ import annotations + +"""Launch Isaac Sim Simulator first.""" + +from omni.isaac.lab.app import AppLauncher, run_tests + +# Can set this to False to see the GUI for debugging +HEADLESS = True + +# launch omniverse app +app_launcher = AppLauncher(headless=HEADLESS) +simulation_app = app_launcher.app + +"""Rest everything follows.""" + +import numpy as np +import torch +import unittest +from gymnasium.spaces import Box, Dict, Discrete, MultiDiscrete, Tuple + +from omni.isaac.lab.envs.utils.spaces import sample_space, spec_to_gym_space + + +class TestSpacesUtils(unittest.TestCase): + """Test for spaces utils' functions""" + + """ + Tests + """ + + def test_spec_to_gym_space(self): + # fundamental spaces + # Box + space = spec_to_gym_space(1) + self.assertIsInstance(space, Box) + self.assertEqual(space.shape, (1,)) + space = spec_to_gym_space([1, 2, 3, 4, 5]) + self.assertIsInstance(space, Box) + self.assertEqual(space.shape, (1, 2, 3, 4, 5)) + space = spec_to_gym_space(Box(low=-1.0, high=1.0, shape=(1, 2))) + self.assertIsInstance(space, Box) + # Discrete + space = spec_to_gym_space({2}) + self.assertIsInstance(space, Discrete) + self.assertEqual(space.n, 2) + space = spec_to_gym_space(Discrete(2)) + self.assertIsInstance(space, Discrete) + # MultiDiscrete + space = spec_to_gym_space([{1}, {2}, {3}]) + self.assertIsInstance(space, MultiDiscrete) + self.assertEqual(space.nvec.shape, (3,)) + space = spec_to_gym_space(MultiDiscrete(np.array([1, 2, 3]))) + self.assertIsInstance(space, MultiDiscrete) + # composite spaces + # Tuple + space = spec_to_gym_space(([1, 2, 3, 4, 5], {2}, [{1}, {2}, {3}])) + self.assertIsInstance(space, Tuple) + self.assertEqual(len(space), 3) + self.assertIsInstance(space[0], Box) + self.assertIsInstance(space[1], Discrete) + self.assertIsInstance(space[2], MultiDiscrete) + space = spec_to_gym_space(Tuple((Box(-1, 1, shape=(1,)), Discrete(2)))) + self.assertIsInstance(space, Tuple) + # Dict + space = spec_to_gym_space({"box": [1, 2, 3, 4, 5], "discrete": {2}, "multi_discrete": [{1}, {2}, {3}]}) + self.assertIsInstance(space, Dict) + self.assertEqual(len(space), 3) + self.assertIsInstance(space["box"], Box) + self.assertIsInstance(space["discrete"], Discrete) + self.assertIsInstance(space["multi_discrete"], MultiDiscrete) + space = spec_to_gym_space(Dict({"box": Box(-1, 1, shape=(1,)), "discrete": Discrete(2)})) + self.assertIsInstance(space, Dict) + + def test_sample_space(self): + device = "cpu" + # fundamental spaces + # Box + sample = sample_space(Box(low=-1.0, high=1.0, shape=(1, 2)), device, batch_size=1) + self.assertIsInstance(sample, torch.Tensor) + self._check_tensorized(sample, batch_size=1) + # Discrete + sample = sample_space(Discrete(2), device, batch_size=2) + self.assertIsInstance(sample, torch.Tensor) + self._check_tensorized(sample, batch_size=2) + # MultiDiscrete + sample = sample_space(MultiDiscrete(np.array([1, 2, 3])), device, batch_size=3) + self.assertIsInstance(sample, torch.Tensor) + self._check_tensorized(sample, batch_size=3) + # composite spaces + # Tuple + sample = sample_space(Tuple((Box(-1, 1, shape=(1,)), Discrete(2))), device, batch_size=4) + self.assertIsInstance(sample, (tuple, list)) + self._check_tensorized(sample, batch_size=4) + # Dict + sample = sample_space(Dict({"box": Box(-1, 1, shape=(1,)), "discrete": Discrete(2)}), device, batch_size=5) + self.assertIsInstance(sample, dict) + self._check_tensorized(sample, batch_size=5) + + """ + Helper functions. + """ + + def _check_tensorized(self, sample, batch_size): + if isinstance(sample, (tuple, list)): + list(map(self._check_tensorized, sample, [batch_size] * len(sample))) + elif isinstance(sample, dict): + list(map(self._check_tensorized, sample.values(), [batch_size] * len(sample))) + else: + self.assertIsInstance(sample, torch.Tensor) + self.assertEqual(sample.shape[0], batch_size) + + +if __name__ == "__main__": + run_tests() diff --git a/source/extensions/omni.isaac.lab_tasks/config/extension.toml b/source/extensions/omni.isaac.lab_tasks/config/extension.toml index 89ca646936..a6ecb7a56c 100644 --- a/source/extensions/omni.isaac.lab_tasks/config/extension.toml +++ b/source/extensions/omni.isaac.lab_tasks/config/extension.toml @@ -1,7 +1,7 @@ [package] # Note: Semantic Versioning is used: https://semver.org/ -version = "0.10.5" +version = "0.10.7" # Description title = "Isaac Lab Environments" diff --git a/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst b/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst index 2614630bd7..b3ba0a77fd 100644 --- a/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst +++ b/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst @@ -1,11 +1,24 @@ Changelog --------- +0.10.7 (2024-10-02) +~~~~~~~~~~~~~~~~~~~ + +Changed +^^^^^^^ + +* Replace deprecated :attr:`num_observations`, :attr:`num_actions` and :attr:`num_states` in single-agent direct tasks + by :attr:`observation_space`, :attr:`action_space` and :attr:`state_space` respectively. +* Replace deprecated :attr:`num_observations`, :attr:`num_actions` and :attr:`num_states` in multi-agent direct tasks + by :attr:`observation_spaces`, :attr:`action_spaces` and :attr:`state_space` respectively. + + 0.10.6 (2024-09-25) ~~~~~~~~~~~~~~~~~~~ Added ^^^^^ + * Added ``Isaac-Cartpole-RGB-Camera-v0`` and ``Isaac-Cartpole-Depth-Camera-v0`` manager based camera cartpole environments. diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/allegro_hand/allegro_hand_env_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/allegro_hand/allegro_hand_env_cfg.py index b83b6782a6..b5c53a91d3 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/allegro_hand/allegro_hand_env_cfg.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/allegro_hand/allegro_hand_env_cfg.py @@ -22,9 +22,9 @@ class AllegroHandEnvCfg(DirectRLEnvCfg): # env decimation = 4 episode_length_s = 10.0 - num_actions = 16 - num_observations = 124 # (full) - num_states = 0 + action_space = 16 + observation_space = 124 # (full) + state_space = 0 asymmetric_obs = False obs_type = "full" # simulation diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/ant/ant_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/ant/ant_env.py index 8bf6d6bcc9..42f57127ee 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/ant/ant_env.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/ant/ant_env.py @@ -24,9 +24,9 @@ class AntEnvCfg(DirectRLEnvCfg): episode_length_s = 15.0 decimation = 2 action_scale = 0.5 - num_actions = 8 - num_observations = 36 - num_states = 0 + action_space = 8 + observation_space = 36 + state_space = 0 # simulation sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation) diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/anymal_c/anymal_c_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/anymal_c/anymal_c_env.py index 5490bb0dd3..ca1f61c54a 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/anymal_c/anymal_c_env.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/anymal_c/anymal_c_env.py @@ -5,6 +5,7 @@ from __future__ import annotations +import gymnasium as gym import torch import omni.isaac.lab.envs.mdp as mdp @@ -59,9 +60,9 @@ class AnymalCFlatEnvCfg(DirectRLEnvCfg): episode_length_s = 20.0 decimation = 4 action_scale = 0.5 - num_actions = 12 - num_observations = 48 - num_states = 0 + action_space = 12 + observation_space = 48 + state_space = 0 # simulation sim: SimulationCfg = SimulationCfg( @@ -118,7 +119,7 @@ class AnymalCFlatEnvCfg(DirectRLEnvCfg): @configclass class AnymalCRoughEnvCfg(AnymalCFlatEnvCfg): # env - num_observations = 235 + observation_space = 235 terrain = TerrainImporterCfg( prim_path="/World/ground", @@ -160,8 +161,10 @@ def __init__(self, cfg: AnymalCFlatEnvCfg | AnymalCRoughEnvCfg, render_mode: str super().__init__(cfg, render_mode, **kwargs) # Joint position command (deviation from default joint positions) - self._actions = torch.zeros(self.num_envs, self.cfg.num_actions, device=self.device) - self._previous_actions = torch.zeros(self.num_envs, self.cfg.num_actions, device=self.device) + self._actions = torch.zeros(self.num_envs, gym.spaces.flatdim(self.single_action_space), device=self.device) + self._previous_actions = torch.zeros( + self.num_envs, gym.spaces.flatdim(self.single_action_space), device=self.device + ) # X/Y linear velocity and yaw angular velocity commands self._commands = torch.zeros(self.num_envs, 3, device=self.device) diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py index 0b606fe899..ad8c616940 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py @@ -27,9 +27,9 @@ class CartDoublePendulumEnvCfg(DirectMARLEnvCfg): decimation = 2 episode_length_s = 5.0 possible_agents = ["cart", "pendulum"] - num_actions = {"cart": 1, "pendulum": 1} - num_observations = {"cart": 4, "pendulum": 3} - num_states = -1 + action_spaces = {"cart": 1, "pendulum": 1} + observation_spaces = {"cart": 4, "pendulum": 3} + state_space = -1 # simulation sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation) diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_camera_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_camera_env.py index b2a1b1e303..dc7db07030 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_camera_env.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_camera_env.py @@ -5,9 +5,7 @@ from __future__ import annotations -import gymnasium as gym import math -import numpy as np import torch from collections.abc import Sequence @@ -29,9 +27,6 @@ class CartpoleRGBCameraEnvCfg(DirectRLEnvCfg): decimation = 2 episode_length_s = 5.0 action_scale = 100.0 # [N] - num_actions = 1 - num_channels = 3 - num_states = 0 # simulation sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation) @@ -52,9 +47,13 @@ class CartpoleRGBCameraEnvCfg(DirectRLEnvCfg): width=80, height=80, ) - num_observations = num_channels * tiled_camera.height * tiled_camera.width write_image_to_file = False + # spaces + action_space = 1 + state_space = 0 + observation_space = [tiled_camera.height, tiled_camera.width, 3] + # change viewer settings viewer = ViewerCfg(eye=(20.0, 20.0, 20.0)) @@ -87,9 +86,8 @@ class CartpoleDepthCameraEnvCfg(CartpoleRGBCameraEnvCfg): height=80, ) - # env - num_channels = 1 - num_observations = num_channels * tiled_camera.height * tiled_camera.width + # spaces + observation_space = [tiled_camera.height, tiled_camera.width, 1] class CartpoleCameraEnv(DirectRLEnv): @@ -118,35 +116,6 @@ def close(self): """Cleanup for the environment.""" super().close() - def _configure_gym_env_spaces(self): - """Configure the action and observation spaces for the Gym environment.""" - # observation space (unbounded since we don't impose any limits) - self.num_actions = self.cfg.num_actions - self.num_observations = self.cfg.num_observations - self.num_states = self.cfg.num_states - - # set up spaces - self.single_observation_space = gym.spaces.Dict() - self.single_observation_space["policy"] = gym.spaces.Box( - low=-np.inf, - high=np.inf, - shape=(self.cfg.tiled_camera.height, self.cfg.tiled_camera.width, self.cfg.num_channels), - ) - if self.num_states > 0: - self.single_observation_space["critic"] = gym.spaces.Box( - low=-np.inf, - high=np.inf, - shape=(self.cfg.tiled_camera.height, self.cfg.tiled_camera.width, self.cfg.num_channels), - ) - self.single_action_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.num_actions,)) - - # batch the spaces for vectorized environments - self.observation_space = gym.vector.utils.batch_space(self.single_observation_space, self.num_envs) - self.action_space = gym.vector.utils.batch_space(self.single_action_space, self.num_envs) - - # RL specifics - self.actions = torch.zeros(self.num_envs, self.num_actions, device=self.sim.device) - def _setup_scene(self): """Setup the scene with the cartpole and camera.""" self._cartpole = Articulation(self.cfg.robot_cfg) diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_env.py index 44926e95f9..534fb26443 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_env.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_env.py @@ -27,9 +27,9 @@ class CartpoleEnvCfg(DirectRLEnvCfg): decimation = 2 episode_length_s = 5.0 action_scale = 100.0 # [N] - num_actions = 1 - num_observations = 4 - num_states = 0 + action_space = 1 + observation_space = 4 + state_space = 0 # simulation sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation) diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/franka_cabinet_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/franka_cabinet_env.py index 4eb01953fe..3a6a480ed0 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/franka_cabinet_env.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/franka_cabinet_env.py @@ -28,9 +28,9 @@ class FrankaCabinetEnvCfg(DirectRLEnvCfg): # env episode_length_s = 8.3333 # 500 timesteps decimation = 2 - num_actions = 9 - num_observations = 23 - num_states = 0 + action_space = 9 + observation_space = 23 + state_space = 0 # simulation sim: SimulationCfg = SimulationCfg( diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py index bfaf8f8190..2a4d330e6a 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py @@ -24,9 +24,9 @@ class HumanoidEnvCfg(DirectRLEnvCfg): episode_length_s = 15.0 decimation = 2 action_scale = 1.0 - num_actions = 21 - num_observations = 75 - num_states = 0 + action_space = 21 + observation_space = 75 + state_space = 0 # simulation sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation) diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/quadcopter/quadcopter_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/quadcopter/quadcopter_env.py index c6df659ec6..97156618f1 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/quadcopter/quadcopter_env.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/quadcopter/quadcopter_env.py @@ -5,6 +5,7 @@ from __future__ import annotations +import gymnasium as gym import torch import omni.isaac.lab.sim as sim_utils @@ -50,9 +51,9 @@ class QuadcopterEnvCfg(DirectRLEnvCfg): # env episode_length_s = 10.0 decimation = 2 - num_actions = 4 - num_observations = 12 - num_states = 0 + action_space = 4 + observation_space = 12 + state_space = 0 debug_vis = True ui_window_class_type = QuadcopterEnvWindow @@ -105,7 +106,7 @@ def __init__(self, cfg: QuadcopterEnvCfg, render_mode: str | None = None, **kwar super().__init__(cfg, render_mode, **kwargs) # Total thrust and moment applied to the base of the quadcopter - self._actions = torch.zeros(self.num_envs, self.cfg.num_actions, device=self.device) + self._actions = torch.zeros(self.num_envs, gym.spaces.flatdim(self.single_action_space), device=self.device) self._thrust = torch.zeros(self.num_envs, 1, 3, device=self.device) self._moment = torch.zeros(self.num_envs, 1, 3, device=self.device) # Goal position diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_env_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_env_cfg.py index f4b8407296..af88124792 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_env_cfg.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_env_cfg.py @@ -119,9 +119,9 @@ class ShadowHandEnvCfg(DirectRLEnvCfg): # env decimation = 2 episode_length_s = 10.0 - num_actions = 20 - num_observations = 157 # (full) - num_states = 0 + action_space = 20 + observation_space = 157 # (full) + state_space = 0 asymmetric_obs = False obs_type = "full" @@ -232,9 +232,9 @@ class ShadowHandOpenAIEnvCfg(ShadowHandEnvCfg): # env decimation = 3 episode_length_s = 8.0 - num_actions = 20 - num_observations = 42 - num_states = 187 + action_space = 20 + observation_space = 42 + state_space = 187 asymmetric_obs = True obs_type = "openai" # simulation diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_vision_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_vision_env.py index b025bfb052..492074d8a9 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_vision_env.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_vision_env.py @@ -48,8 +48,8 @@ class ShadowHandVisionEnvCfg(ShadowHandEnvCfg): feature_extractor = FeatureExtractorCfg() # env - num_observations = 164 + 27 # state observation + vision CNN embedding - num_states = 187 + 27 # asymettric states + vision CNN embedding + observation_space = 164 + 27 # state observation + vision CNN embedding + state_space = 187 + 27 # asymettric states + vision CNN embedding @configclass diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand_over/shadow_hand_over_env_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand_over/shadow_hand_over_env_cfg.py index d6dbb3d6a2..d3a7c33b3f 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand_over/shadow_hand_over_env_cfg.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand_over/shadow_hand_over_env_cfg.py @@ -118,9 +118,9 @@ class ShadowHandOverEnvCfg(DirectMARLEnvCfg): decimation = 2 episode_length_s = 7.5 possible_agents = ["right_hand", "left_hand"] - num_actions = {"right_hand": 20, "left_hand": 20} - num_observations = {"right_hand": 157, "left_hand": 157} - num_states = 290 + action_spaces = {"right_hand": 20, "left_hand": 20} + observation_spaces = {"right_hand": 157, "left_hand": 157} + state_space = 290 # simulation sim: SimulationCfg = SimulationCfg( diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/utils/wrappers/rsl_rl/vecenv_wrapper.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/utils/wrappers/rsl_rl/vecenv_wrapper.py index 0badd08c31..0dedef9ef0 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/utils/wrappers/rsl_rl/vecenv_wrapper.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/utils/wrappers/rsl_rl/vecenv_wrapper.py @@ -70,19 +70,19 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv): if hasattr(self.unwrapped, "action_manager"): self.num_actions = self.unwrapped.action_manager.total_action_dim else: - self.num_actions = self.unwrapped.num_actions + self.num_actions = gym.spaces.flatdim(self.unwrapped.single_action_space) if hasattr(self.unwrapped, "observation_manager"): self.num_obs = self.unwrapped.observation_manager.group_obs_dim["policy"][0] else: - self.num_obs = self.unwrapped.num_observations + self.num_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["policy"]) # -- privileged observations if ( hasattr(self.unwrapped, "observation_manager") and "critic" in self.unwrapped.observation_manager.group_obs_dim ): self.num_privileged_obs = self.unwrapped.observation_manager.group_obs_dim["critic"][0] - elif hasattr(self.unwrapped, "num_states"): - self.num_privileged_obs = self.unwrapped.num_states + elif hasattr(self.unwrapped, "num_states") and "critic" in self.unwrapped.single_observation_space: + self.num_privileged_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["critic"]) else: self.num_privileged_obs = 0 # reset at the start since the RSL-RL runner does not call reset diff --git a/source/extensions/omni.isaac.lab_tasks/test/test_environments.py b/source/extensions/omni.isaac.lab_tasks/test/test_environments.py index 9e92e26156..993b776a81 100644 --- a/source/extensions/omni.isaac.lab_tasks/test/test_environments.py +++ b/source/extensions/omni.isaac.lab_tasks/test/test_environments.py @@ -22,6 +22,7 @@ import omni.usd from omni.isaac.lab.envs import ManagerBasedRLEnvCfg +from omni.isaac.lab.envs.utils import sample_space import omni.isaac.lab_tasks # noqa: F401 from omni.isaac.lab_tasks.utils.parse_cfg import parse_env_cfg @@ -108,12 +109,12 @@ def _check_random_actions(self, task_name: str, device: str, num_envs: int, num_ # simulate environment for num_steps steps with torch.inference_mode(): for _ in range(num_steps): - # sample actions from -1 to 1 - actions = 2 * torch.rand(env.action_space.shape, device=env.unwrapped.device) - 1 + # sample actions according to the defined space + actions = sample_space(env.single_action_space, device=env.unwrapped.device, batch_size=num_envs) # apply actions transition = env.step(actions) # check signals - for data in transition: + for data in transition[:-1]: # exclude info self.assertTrue(self._check_valid_tensor(data), msg=f"Invalid data: {data}") # close the environment @@ -131,14 +132,10 @@ def _check_valid_tensor(data: torch.Tensor | dict) -> bool: """ if isinstance(data, torch.Tensor): return not torch.any(torch.isnan(data)) + elif isinstance(data, (tuple, list)): + return all(TestEnvironments._check_valid_tensor(value) for value in data) elif isinstance(data, dict): - valid_tensor = True - for value in data.values(): - if isinstance(value, dict): - valid_tensor &= TestEnvironments._check_valid_tensor(value) - elif isinstance(value, torch.Tensor): - valid_tensor &= not torch.any(torch.isnan(value)) - return valid_tensor + return all(TestEnvironments._check_valid_tensor(value) for value in data.values()) else: raise ValueError(f"Input data of invalid type: {type(data)}.") diff --git a/source/extensions/omni.isaac.lab_tasks/test/test_multi_agent_environments.py b/source/extensions/omni.isaac.lab_tasks/test/test_multi_agent_environments.py index 19fcd88936..2f543a84e3 100644 --- a/source/extensions/omni.isaac.lab_tasks/test/test_multi_agent_environments.py +++ b/source/extensions/omni.isaac.lab_tasks/test/test_multi_agent_environments.py @@ -21,6 +21,7 @@ import omni.usd from omni.isaac.lab.envs import DirectMARLEnv, DirectMARLEnvCfg +from omni.isaac.lab.envs.utils import sample_space import omni.isaac.lab_tasks # noqa: F401 from omni.isaac.lab_tasks.utils.parse_cfg import parse_env_cfg @@ -104,9 +105,9 @@ def _check_random_actions(self, task_name: str, device: str, num_envs: int, num_ # simulate environment for num_steps steps with torch.inference_mode(): for _ in range(num_steps): - # sample actions from -1 to 1 + # sample actions according to the defined space actions = { - agent: 2 * torch.rand(env.action_space(agent).shape, device=env.unwrapped.device) - 1 + agent: sample_space(env.action_spaces[agent], device=env.unwrapped.device) for agent in env.unwrapped.possible_agents } # apply actions @@ -131,14 +132,10 @@ def _check_valid_tensor(data: torch.Tensor | dict) -> bool: """ if isinstance(data, torch.Tensor): return not torch.any(torch.isnan(data)) + elif isinstance(data, (tuple, list)): + return all(TestEnvironments._check_valid_tensor(value) for value in data) elif isinstance(data, dict): - valid_tensor = True - for value in data.values(): - if isinstance(value, dict): - valid_tensor &= TestEnvironments._check_valid_tensor(value) - elif isinstance(value, torch.Tensor): - valid_tensor &= not torch.any(torch.isnan(value)) - return valid_tensor + return all(TestEnvironments._check_valid_tensor(value) for value in data.values()) else: raise ValueError(f"Input data of invalid type: {type(data)}.")