isaac-sim · kellyguo11 · Oct 17, 2024 · Sep 30, 2024 · Sep 30, 2024 · Sep 30, 2024
@@ -115,7 +115,7 @@ For example, for the configuration of the Cartpole camera depth environment:
     :emphasize-lines: 16
 
 If the user were to modify the width of the camera, i.e. ``env.tiled_camera.width=128``, then the parameter
-``env.num_observations=10240`` (1*80*128) must be updated and given as input as well.
+``env.observation_space=[80,128,1]`` must be updated and given as input as well.
 
 Similarly, the ``__post_init__`` method is not updated with the command line inputs. In the ``LocomotionVelocityRoughEnvCfg``, for example,
 the post init update is as follows:

@@ -45,9 +45,9 @@ Below is an example skeleton of a task config class:
       # env
       decimation = 2
       episode_length_s = 5.0
-      num_actions = 1
-      num_observations = 4
-      num_states = 0
+      action_space = 1
+      observation_space = 4
+      state_space = 0
       # task-specific parameters
       ...
 
@@ -135,9 +135,9 @@ The following parameters must be set for each environment config:
 
    decimation = 2
    episode_length_s = 5.0
-   num_actions = 1
-   num_observations = 4
-   num_states = 0
+   action_space = 1
+   observation_space = 4
+   state_space = 0
 
 Note that the maximum episode length parameter (now ``episode_length_s``) is in seconds instead of steps as it was
 in IsaacGymEnvs. To convert between step count to seconds, use the equation:
@@ -569,9 +569,9 @@ Task Config
 |                                                        |     decimation = 2                                                  |
 |   asset:                                               |     episode_length_s = 5.0                                          |
 |     assetRoot: "../../assets"                          |     action_scale = 100.0  # [N]                                     |
-|     assetFileName: "urdf/cartpole.urdf"                |     num_actions = 1                                                 |
-|                                                        |     num_observations = 4                                            |
-|   enableCameraSensors: False                           |     num_states = 0                                                  |
+|     assetFileName: "urdf/cartpole.urdf"                |     action_space = 1                                                |
+|                                                        |     observation_space = 4                                           |
+|   enableCameraSensors: False                           |     state_space = 0                                                 |
 |                                                        |     # reset                                                         |
 | sim:                                                   |     max_cart_pos = 3.0                                              |
 |   dt: 0.0166 # 1/60 s                                  |     initial_pole_angle_range = [-0.25, 0.25]                        |

@@ -46,9 +46,9 @@ Below is an example skeleton of a task config class:
       # env
       decimation = 2
       episode_length_s = 5.0
-      num_actions = 1
-      num_observations = 4
-      num_states = 0
+      action_space = 1
+      observation_space = 4
+      state_space = 0
       # task-specific parameters
       ...
 
@@ -158,9 +158,9 @@ The following parameters must be set for each environment config:
 
    decimation = 2
    episode_length_s = 5.0
-   num_actions = 1
-   num_observations = 4
-   num_states = 0
+   action_space = 1
+   observation_space = 4
+   state_space = 0
 
 
 RL Config Setup
@@ -501,9 +501,9 @@ Task config in Isaac Lab can be split into the main task configuration class and
 |   clipObservations: 5.0                                         |     decimation = 2                                              |
 |   clipActions: 1.0                                              |     episode_length_s = 5.0                                      |
 |   controlFrequencyInv: 2 # 60 Hz                                |     action_scale = 100.0  # [N]                                 |
-|                                                                 |     num_actions = 1                                             |
-| sim:                                                            |     num_observations = 4                                        |
-|                                                                 |     num_states = 0                                              |
+|                                                                 |     action_space = 1                                            |
+| sim:                                                            |     observation_space = 4                                       |
+|                                                                 |     state_space = 0                                             |
 |   dt: 0.0083 # 1/120 s                                          |     # reset                                                     |
 |   use_gpu_pipeline: ${eq:${...pipeline},"gpu"}                  |     max_cart_pos = 3.0                                          |
 |   gravity: [0.0, 0.0, -9.81]                                    |     initial_pole_angle_range = [-0.25, 0.25]                    |

@@ -28,8 +28,8 @@
 # [end-h1_env-import]
 
 # [start-h1_env-spaces]
-num_actions = 19
-num_observations = 69
+action_space = 19
+observation_space = 69
 # [end-h1_env-spaces]
 
 # [start-h1_env-robot]

@@ -48,9 +48,9 @@ config should define the number of actions and observations for the environment.
    @configclass
    class CartpoleEnvCfg(DirectRLEnvCfg):
       ...
-      num_actions = 1
-      num_observations = 4
-      num_states = 0
+      action_space = 1
+      observation_space = 4
+      state_space = 0
 
 The config class can also be used to define task-specific attributes, such as scaling for reward terms
 and thresholds for reset conditions.

@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.24.19"
+version = "0.24.20"
 
 # Description
 title = "Isaac Lab framework for Robot Learning"

@@ -1,6 +1,25 @@
 Changelog
 ---------
 
+0.24.20 (2024-10-07)
+~~~~~~~~~~~~~~~~~~~~
+
+Added
+^^^^^
+
+* Added support for different Gymnasium spaces (``Box``, ``Discrete``, ``MultiDiscrete``, ``Tuple`` and ``Dict``)
+  to define observation, action and state spaces in the direct workflow.
+* Added :meth:`sample_space` to environment utils to sample supported spaces where data containers are torch tensors.
+
+Changed
+^^^^^^^
+
+* Mark the :attr:`num_observations`, :attr:`num_actions` and :attr:`num_states` in :class:`DirectRLEnvCfg` as deprecated
+  in favor of :attr:`observation_space`, :attr:`action_space` and :attr:`state_space` respectively.
+* Mark the :attr:`num_observations`, :attr:`num_actions` and :attr:`num_states` in :class:`DirectMARLEnvCfg` as deprecated
+  in favor of :attr:`observation_spaces`, :attr:`action_spaces` and :attr:`state_space` respectively.
+
+
 0.24.19 (2024-10-05)
 ~~~~~~~~~~~~~~~~~~~~
 

@@ -5,6 +5,7 @@
 
 from __future__ import annotations
 
+import gymnasium as gym
 import torch
 from typing import Dict, Literal, TypeVar
 
@@ -62,6 +63,9 @@ class ViewerCfg:
 # Types.
 ##
 
+SpaceType = TypeVar("SpaceType", gym.spaces.Space, int, set, tuple, list, dict)
+"""A sentinel object to indicate a valid space type to specify states, observations and actions."""
+
 VecEnvObs = Dict[str, torch.Tensor | Dict[str, torch.Tensor]]
 """Observation returned by the environment.
 

@@ -14,6 +14,7 @@
 import weakref
 from abc import abstractmethod
 from collections.abc import Sequence
+from dataclasses import MISSING
 from typing import Any, ClassVar
 
 import carb
@@ -164,10 +165,6 @@ def __init__(self, cfg: DirectMARLEnvCfg, render_mode: str | None = None, **kwar
         # -- init buffers
         self.episode_length_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.long)
         self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.sim.device)
-        self.actions = {
-            agent: torch.zeros(self.num_envs, self.cfg.num_actions[agent], device=self.sim.device)
-            for agent in self.cfg.possible_agents
-        }
 
         # setup the observation, state and action spaces
         self._configure_env_spaces()
@@ -406,16 +403,19 @@ def state(self) -> StateType | None:
         """Returns the state for the environment.
 
         The state-space is used for centralized training or asymmetric actor-critic architectures. It is configured
-        using the :attr:`DirectMARLEnvCfg.num_states` parameter.
+        using the :attr:`DirectMARLEnvCfg.state_space` parameter.
 
         Returns:
-            The states for the environment, or None if :attr:`DirectMARLEnvCfg.num_states` parameter is zero.
+            The states for the environment, or None if :attr:`DirectMARLEnvCfg.state_space` parameter is zero.
         """
-        if not self.cfg.num_states:
+        if not self.cfg.state_space:
             return None
         # concatenate and return the observations as state
-        if self.cfg.num_states < 0:
-            self.state_buf = torch.cat([self.obs_dict[agent] for agent in self.cfg.possible_agents], dim=-1)
+        # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
+        if isinstance(self.cfg.state_space, int) and self.cfg.state_space < 0:
+            self.state_buf = torch.cat(
+                [self.obs_dict[agent].reshape(self.num_envs, -1) for agent in self.cfg.possible_agents], dim=-1
+            )
         # compute and return custom environment state
         else:
             self.state_buf = self._get_states()
@@ -564,29 +564,52 @@ def set_debug_vis(self, debug_vis: bool) -> bool:
     """
 
     def _configure_env_spaces(self):
+        # defer import to avoid circular import error
+        from omni.isaac.lab.envs.utils import sample_space, spec_to_gym_space
+
         """Configure the spaces for the environment."""
         self.agents = self.cfg.possible_agents
         self.possible_agents = self.cfg.possible_agents
 
+        # show deprecation message and overwrite configuration
+        if self.cfg.num_actions is not None:
+            carb.log_warn("DirectMARLEnvCfg.num_actions is deprecated. Use DirectMARLEnvCfg.action_spaces instead.")
+            if isinstance(self.cfg.action_spaces, type(MISSING)):
+                self.cfg.action_spaces = self.cfg.num_actions
+        if self.cfg.num_observations is not None:
+            carb.log_warn(
+                "DirectMARLEnvCfg.num_observations is deprecated. Use DirectMARLEnvCfg.observation_spaces instead."
+            )
+            if isinstance(self.cfg.observation_spaces, type(MISSING)):
+                self.cfg.observation_spaces = self.cfg.num_observations
+        if self.cfg.num_states is not None:
+            carb.log_warn("DirectMARLEnvCfg.num_states is deprecated. Use DirectMARLEnvCfg.state_space instead.")
+            if isinstance(self.cfg.state_space, type(MISSING)):
+                self.cfg.state_space = self.cfg.num_states
+
         # set up observation and action spaces
         self.observation_spaces = {
-            agent: gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.cfg.num_observations[agent],))
-            for agent in self.cfg.possible_agents
+            agent: spec_to_gym_space(self.cfg.observation_spaces[agent]) for agent in self.cfg.possible_agents
         }
         self.action_spaces = {
-            agent: gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.cfg.num_actions[agent],))
-            for agent in self.cfg.possible_agents
+            agent: spec_to_gym_space(self.cfg.action_spaces[agent]) for agent in self.cfg.possible_agents
         }
 
         # set up state space
-        if not self.cfg.num_states:
+        if not self.cfg.state_space:
             self.state_space = None
-        if self.cfg.num_states < 0:
-            self.state_space = gym.spaces.Box(
-                low=-np.inf, high=np.inf, shape=(sum(self.cfg.num_observations.values()),)
+        if isinstance(self.cfg.state_space, int) and self.cfg.state_space < 0:
+            self.state_space = gym.spaces.flatten_space(
+                gym.spaces.Tuple([self.observation_spaces[agent] for agent in self.cfg.possible_agents])
             )
         else:
-            self.state_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.cfg.num_states,))
+            self.state_space = spec_to_gym_space(self.cfg.state_space)
+
+        # instantiate actions (needed for tasks for which the observations computation is dependent on the actions)
+        self.actions = {
+            agent: sample_space(self.action_spaces[agent], self.sim.device, batch_size=self.num_envs, fill_value=0)
+            for agent in self.cfg.possible_agents
+        }
 
     def _reset_idx(self, env_ids: Sequence[int]):
         """Reset environments based on specified indices.
@@ -664,8 +687,8 @@ def _get_observations(self) -> dict[AgentID, ObsType]:
     def _get_states(self) -> StateType:
         """Compute and return the states for the environment.
 
-        This method is only called (and therefore has to be implemented) when the :attr:`DirectMARLEnvCfg.num_states`
-        parameter is greater than zero.
+        This method is only called (and therefore has to be implemented) when the :attr:`DirectMARLEnvCfg.state_space`
+        parameter is not a number less than or equal to zero.
 
         Returns:
             The states for the environment.