diff --git a/docs/source/features/hydra.rst b/docs/source/features/hydra.rst
index 2fa5d2d706..89673dbe35 100644
--- a/docs/source/features/hydra.rst
+++ b/docs/source/features/hydra.rst
@@ -115,7 +115,7 @@ For example, for the configuration of the Cartpole camera depth environment:
     :emphasize-lines: 16
 
 If the user were to modify the width of the camera, i.e. ``env.tiled_camera.width=128``, then the parameter
-``env.num_observations=10240`` (1*80*128) must be updated and given as input as well.
+``env.observation_space=[80,128,1]`` must be updated and given as input as well.
 
 Similarly, the ``__post_init__`` method is not updated with the command line inputs. In the ``LocomotionVelocityRoughEnvCfg``, for example,
 the post init update is as follows:
diff --git a/docs/source/migration/migrating_from_isaacgymenvs.rst b/docs/source/migration/migrating_from_isaacgymenvs.rst
index c903d83c3d..2073854d32 100644
--- a/docs/source/migration/migrating_from_isaacgymenvs.rst
+++ b/docs/source/migration/migrating_from_isaacgymenvs.rst
@@ -45,9 +45,9 @@ Below is an example skeleton of a task config class:
       # env
       decimation = 2
       episode_length_s = 5.0
-      num_actions = 1
-      num_observations = 4
-      num_states = 0
+      action_space = 1
+      observation_space = 4
+      state_space = 0
       # task-specific parameters
       ...
 
@@ -135,9 +135,9 @@ The following parameters must be set for each environment config:
 
    decimation = 2
    episode_length_s = 5.0
-   num_actions = 1
-   num_observations = 4
-   num_states = 0
+   action_space = 1
+   observation_space = 4
+   state_space = 0
 
 Note that the maximum episode length parameter (now ``episode_length_s``) is in seconds instead of steps as it was
 in IsaacGymEnvs. To convert between step count to seconds, use the equation:
@@ -569,9 +569,9 @@ Task Config
 |                                                        |     decimation = 2                                                  |
 |   asset:                                               |     episode_length_s = 5.0                                          |
 |     assetRoot: "../../assets"                          |     action_scale = 100.0  # [N]                                     |
-|     assetFileName: "urdf/cartpole.urdf"                |     num_actions = 1                                                 |
-|                                                        |     num_observations = 4                                            |
-|   enableCameraSensors: False                           |     num_states = 0                                                  |
+|     assetFileName: "urdf/cartpole.urdf"                |     action_space = 1                                                |
+|                                                        |     observation_space = 4                                           |
+|   enableCameraSensors: False                           |     state_space = 0                                                 |
 |                                                        |     # reset                                                         |
 | sim:                                                   |     max_cart_pos = 3.0                                              |
 |   dt: 0.0166 # 1/60 s                                  |     initial_pole_angle_range = [-0.25, 0.25]                        |
diff --git a/docs/source/migration/migrating_from_omniisaacgymenvs.rst b/docs/source/migration/migrating_from_omniisaacgymenvs.rst
index 50f9d5b9d6..cbda1e8d45 100644
--- a/docs/source/migration/migrating_from_omniisaacgymenvs.rst
+++ b/docs/source/migration/migrating_from_omniisaacgymenvs.rst
@@ -46,9 +46,9 @@ Below is an example skeleton of a task config class:
       # env
       decimation = 2
       episode_length_s = 5.0
-      num_actions = 1
-      num_observations = 4
-      num_states = 0
+      action_space = 1
+      observation_space = 4
+      state_space = 0
       # task-specific parameters
       ...
 
@@ -158,9 +158,9 @@ The following parameters must be set for each environment config:
 
    decimation = 2
    episode_length_s = 5.0
-   num_actions = 1
-   num_observations = 4
-   num_states = 0
+   action_space = 1
+   observation_space = 4
+   state_space = 0
 
 
 RL Config Setup
@@ -501,9 +501,9 @@ Task config in Isaac Lab can be split into the main task configuration class and
 |   clipObservations: 5.0                                         |     decimation = 2                                              |
 |   clipActions: 1.0                                              |     episode_length_s = 5.0                                      |
 |   controlFrequencyInv: 2 # 60 Hz                                |     action_scale = 100.0  # [N]                                 |
-|                                                                 |     num_actions = 1                                             |
-| sim:                                                            |     num_observations = 4                                        |
-|                                                                 |     num_states = 0                                              |
+|                                                                 |     action_space = 1                                            |
+| sim:                                                            |     observation_space = 4                                       |
+|                                                                 |     state_space = 0                                             |
 |   dt: 0.0083 # 1/120 s                                          |     # reset                                                     |
 |   use_gpu_pipeline: ${eq:${...pipeline},"gpu"}                  |     max_cart_pos = 3.0                                          |
 |   gravity: [0.0, 0.0, -9.81]                                    |     initial_pole_angle_range = [-0.25, 0.25]                    |
diff --git a/docs/source/refs/snippets/tutorial_modify_direct_rl_env.py b/docs/source/refs/snippets/tutorial_modify_direct_rl_env.py
index 8073886840..215ef3bd4f 100644
--- a/docs/source/refs/snippets/tutorial_modify_direct_rl_env.py
+++ b/docs/source/refs/snippets/tutorial_modify_direct_rl_env.py
@@ -28,8 +28,8 @@
 # [end-h1_env-import]
 
 # [start-h1_env-spaces]
-num_actions = 19
-num_observations = 69
+action_space = 19
+observation_space = 69
 # [end-h1_env-spaces]
 
 # [start-h1_env-robot]
diff --git a/docs/source/tutorials/03_envs/create_direct_rl_env.rst b/docs/source/tutorials/03_envs/create_direct_rl_env.rst
index ab5136106a..a4b945be9d 100644
--- a/docs/source/tutorials/03_envs/create_direct_rl_env.rst
+++ b/docs/source/tutorials/03_envs/create_direct_rl_env.rst
@@ -48,9 +48,9 @@ config should define the number of actions and observations for the environment.
    @configclass
    class CartpoleEnvCfg(DirectRLEnvCfg):
       ...
-      num_actions = 1
-      num_observations = 4
-      num_states = 0
+      action_space = 1
+      observation_space = 4
+      state_space = 0
 
 The config class can also be used to define task-specific attributes, such as scaling for reward terms
 and thresholds for reset conditions.
diff --git a/source/extensions/omni.isaac.lab/config/extension.toml b/source/extensions/omni.isaac.lab/config/extension.toml
index 517ac00610..a633e64fb8 100644
--- a/source/extensions/omni.isaac.lab/config/extension.toml
+++ b/source/extensions/omni.isaac.lab/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.25.1"
+version = "0.25.2"
 
 # Description
 title = "Isaac Lab framework for Robot Learning"
diff --git a/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst b/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst
index e78abdb068..12e4b5b45a 100644
--- a/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst
+++ b/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst
@@ -1,6 +1,25 @@
 Changelog
 ---------
 
+0.25.2 (2024-10-16)
+~~~~~~~~~~~~~~~~~~~~
+
+Added
+^^^^^
+
+* Added support for different Gymnasium spaces (``Box``, ``Discrete``, ``MultiDiscrete``, ``Tuple`` and ``Dict``)
+  to define observation, action and state spaces in the direct workflow.
+* Added :meth:`sample_space` to environment utils to sample supported spaces where data containers are torch tensors.
+
+Changed
+^^^^^^^
+
+* Mark the :attr:`num_observations`, :attr:`num_actions` and :attr:`num_states` in :class:`DirectRLEnvCfg` as deprecated
+  in favor of :attr:`observation_space`, :attr:`action_space` and :attr:`state_space` respectively.
+* Mark the :attr:`num_observations`, :attr:`num_actions` and :attr:`num_states` in :class:`DirectMARLEnvCfg` as deprecated
+  in favor of :attr:`observation_spaces`, :attr:`action_spaces` and :attr:`state_space` respectively.
+
+
 0.25.1 (2024-10-10)
 ~~~~~~~~~~~~~~~~~~~
 
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/__init__.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/__init__.py
index 97701e50cc..667c0b5049 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/__init__.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/__init__.py
@@ -52,4 +52,4 @@
 from .manager_based_env_cfg import ManagerBasedEnvCfg
 from .manager_based_rl_env import ManagerBasedRLEnv
 from .manager_based_rl_env_cfg import ManagerBasedRLEnvCfg
-from .utils import multi_agent_to_single_agent, multi_agent_with_one_agent
+from .utils.marl import multi_agent_to_single_agent, multi_agent_with_one_agent
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/common.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/common.py
index d0c99f8ad8..7d6b02d309 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/common.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/common.py
@@ -5,6 +5,7 @@
 
 from __future__ import annotations
 
+import gymnasium as gym
 import torch
 from typing import Dict, Literal, TypeVar
 
@@ -62,6 +63,9 @@ class ViewerCfg:
 # Types.
 ##
 
+SpaceType = TypeVar("SpaceType", gym.spaces.Space, int, set, tuple, list, dict)
+"""A sentinel object to indicate a valid space type to specify states, observations and actions."""
+
 VecEnvObs = Dict[str, torch.Tensor | Dict[str, torch.Tensor]]
 """Observation returned by the environment.
 
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py
index f58a80dd47..5bcedb0591 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env.py
@@ -14,6 +14,7 @@
 import weakref
 from abc import abstractmethod
 from collections.abc import Sequence
+from dataclasses import MISSING
 from typing import Any, ClassVar
 
 import omni.isaac.core.utils.torch as torch_utils
@@ -30,6 +31,7 @@
 from .common import ActionType, AgentID, EnvStepReturn, ObsType, StateType
 from .direct_marl_env_cfg import DirectMARLEnvCfg
 from .ui import ViewportCameraController
+from .utils.spaces import sample_space, spec_to_gym_space
 
 
 class DirectMARLEnv:
@@ -164,10 +166,6 @@ def __init__(self, cfg: DirectMARLEnvCfg, render_mode: str | None = None, **kwar
         # -- init buffers
         self.episode_length_buf = torch.zeros(self.num_envs, device=self.device, dtype=torch.long)
         self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.sim.device)
-        self.actions = {
-            agent: torch.zeros(self.num_envs, self.cfg.num_actions[agent], device=self.sim.device)
-            for agent in self.cfg.possible_agents
-        }
 
         # setup the observation, state and action spaces
         self._configure_env_spaces()
@@ -406,16 +404,19 @@ def state(self) -> StateType | None:
         """Returns the state for the environment.
 
         The state-space is used for centralized training or asymmetric actor-critic architectures. It is configured
-        using the :attr:`DirectMARLEnvCfg.num_states` parameter.
+        using the :attr:`DirectMARLEnvCfg.state_space` parameter.
 
         Returns:
-            The states for the environment, or None if :attr:`DirectMARLEnvCfg.num_states` parameter is zero.
+            The states for the environment, or None if :attr:`DirectMARLEnvCfg.state_space` parameter is zero.
         """
-        if not self.cfg.num_states:
+        if not self.cfg.state_space:
             return None
         # concatenate and return the observations as state
-        if self.cfg.num_states < 0:
-            self.state_buf = torch.cat([self.obs_dict[agent] for agent in self.cfg.possible_agents], dim=-1)
+        # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
+        if isinstance(self.cfg.state_space, int) and self.cfg.state_space < 0:
+            self.state_buf = torch.cat(
+                [self.obs_dict[agent].reshape(self.num_envs, -1) for agent in self.cfg.possible_agents], dim=-1
+            )
         # compute and return custom environment state
         else:
             self.state_buf = self._get_states()
@@ -568,25 +569,45 @@ def _configure_env_spaces(self):
         self.agents = self.cfg.possible_agents
         self.possible_agents = self.cfg.possible_agents
 
+        # show deprecation message and overwrite configuration
+        if self.cfg.num_actions is not None:
+            omni.log.warn("DirectMARLEnvCfg.num_actions is deprecated. Use DirectMARLEnvCfg.action_spaces instead.")
+            if isinstance(self.cfg.action_spaces, type(MISSING)):
+                self.cfg.action_spaces = self.cfg.num_actions
+        if self.cfg.num_observations is not None:
+            omni.log.warn(
+                "DirectMARLEnvCfg.num_observations is deprecated. Use DirectMARLEnvCfg.observation_spaces instead."
+            )
+            if isinstance(self.cfg.observation_spaces, type(MISSING)):
+                self.cfg.observation_spaces = self.cfg.num_observations
+        if self.cfg.num_states is not None:
+            omni.log.warn("DirectMARLEnvCfg.num_states is deprecated. Use DirectMARLEnvCfg.state_space instead.")
+            if isinstance(self.cfg.state_space, type(MISSING)):
+                self.cfg.state_space = self.cfg.num_states
+
         # set up observation and action spaces
         self.observation_spaces = {
-            agent: gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.cfg.num_observations[agent],))
-            for agent in self.cfg.possible_agents
+            agent: spec_to_gym_space(self.cfg.observation_spaces[agent]) for agent in self.cfg.possible_agents
         }
         self.action_spaces = {
-            agent: gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.cfg.num_actions[agent],))
-            for agent in self.cfg.possible_agents
+            agent: spec_to_gym_space(self.cfg.action_spaces[agent]) for agent in self.cfg.possible_agents
         }
 
         # set up state space
-        if not self.cfg.num_states:
+        if not self.cfg.state_space:
             self.state_space = None
-        if self.cfg.num_states < 0:
-            self.state_space = gym.spaces.Box(
-                low=-np.inf, high=np.inf, shape=(sum(self.cfg.num_observations.values()),)
+        if isinstance(self.cfg.state_space, int) and self.cfg.state_space < 0:
+            self.state_space = gym.spaces.flatten_space(
+                gym.spaces.Tuple([self.observation_spaces[agent] for agent in self.cfg.possible_agents])
             )
         else:
-            self.state_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.cfg.num_states,))
+            self.state_space = spec_to_gym_space(self.cfg.state_space)
+
+        # instantiate actions (needed for tasks for which the observations computation is dependent on the actions)
+        self.actions = {
+            agent: sample_space(self.action_spaces[agent], self.sim.device, batch_size=self.num_envs, fill_value=0)
+            for agent in self.cfg.possible_agents
+        }
 
     def _reset_idx(self, env_ids: Sequence[int]):
         """Reset environments based on specified indices.
@@ -664,8 +685,8 @@ def _get_observations(self) -> dict[AgentID, ObsType]:
     def _get_states(self) -> StateType:
         """Compute and return the states for the environment.
 
-        This method is only called (and therefore has to be implemented) when the :attr:`DirectMARLEnvCfg.num_states`
-        parameter is greater than zero.
+        This method is only called (and therefore has to be implemented) when the :attr:`DirectMARLEnvCfg.state_space`
+        parameter is not a number less than or equal to zero.
 
         Returns:
             The states for the environment.
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py
index 3dcf364f5c..40ecb64297 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_marl_env_cfg.py
@@ -10,7 +10,7 @@
 from omni.isaac.lab.utils import configclass
 from omni.isaac.lab.utils.noise import NoiseModelCfg
 
-from .common import AgentID, ViewerCfg
+from .common import AgentID, SpaceType, ViewerCfg
 from .ui import BaseEnvWindow
 
 
@@ -104,11 +104,39 @@ class DirectMARLEnvCfg:
     Please refer to the :class:`omni.isaac.lab.managers.EventManager` class for more details.
     """
 
-    num_observations: dict[AgentID, int] = MISSING
-    """The dimension of the observation space from each agent."""
+    observation_spaces: dict[AgentID, SpaceType] = MISSING
+    """Observation space definition for each agent.
+
+    The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed
+    specification of the space is desired) or basic Python data types (for simplicity).
+
+    .. list-table::
+        :header-rows: 1
+
+        * - Gymnasium space
+          - Python data type
+        * - :class:`~gymnasium.spaces.Box`
+          - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``)
+        * - :class:`~gymnasium.spaces.Discrete`
+          - Single-element set (e.g.: ``{2}``)
+        * - :class:`~gymnasium.spaces.MultiDiscrete`
+          - List of single-element sets (e.g.: ``[{2}, {5}]``)
+        * - :class:`~gymnasium.spaces.Dict`
+          - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``)
+        * - :class:`~gymnasium.spaces.Tuple`
+          - Tuple (e.g.: ``(7, [64, 64, 3], {2})``)
+    """
 
-    num_states: int = MISSING
-    """The dimension of the state space from each environment instance.
+    num_observations: dict[AgentID, int] | None = None
+    """The dimension of the observation space for each agent.
+
+    .. warning::
+
+        This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectMARLEnvCfg.observation_spaces` instead.
+    """
+
+    state_space: SpaceType = MISSING
+    """State space definition.
 
     The following values are supported:
 
@@ -116,6 +144,33 @@ class DirectMARLEnvCfg:
     * 0: No state-space will be constructed (`state_space` is None).
       This is useful to save computational resources when the algorithm to be trained does not need it.
     * greater than 0: Custom state-space dimension to be provided by the task implementation.
+
+    The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed
+    specification of the space is desired) or basic Python data types (for simplicity).
+
+    .. list-table::
+        :header-rows: 1
+
+        * - Gymnasium space
+          - Python data type
+        * - :class:`~gymnasium.spaces.Box`
+          - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``)
+        * - :class:`~gymnasium.spaces.Discrete`
+          - Single-element set (e.g.: ``{2}``)
+        * - :class:`~gymnasium.spaces.MultiDiscrete`
+          - List of single-element sets (e.g.: ``[{2}, {5}]``)
+        * - :class:`~gymnasium.spaces.Dict`
+          - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``)
+        * - :class:`~gymnasium.spaces.Tuple`
+          - Tuple (e.g.: ``(7, [64, 64, 3], {2})``)
+    """
+
+    num_states: int | None = None
+    """The dimension of the state space from each environment instance.
+
+    .. warning::
+
+        This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectMARLEnvCfg.state_space` instead.
     """
 
     observation_noise_model: dict[AgentID, NoiseModelCfg | None] | None = None
@@ -124,8 +179,36 @@ class DirectMARLEnvCfg:
     Please refer to the :class:`omni.isaac.lab.utils.noise.NoiseModel` class for more details.
     """
 
-    num_actions: dict[AgentID, int] = MISSING
-    """The dimension of the action space for each agent."""
+    action_spaces: dict[AgentID, SpaceType] = MISSING
+    """Action space definition for each agent.
+
+    The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed
+    specification of the space is desired) or basic Python data types (for simplicity).
+
+    .. list-table::
+        :header-rows: 1
+
+        * - Gymnasium space
+          - Python data type
+        * - :class:`~gymnasium.spaces.Box`
+          - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``)
+        * - :class:`~gymnasium.spaces.Discrete`
+          - Single-element set (e.g.: ``{2}``)
+        * - :class:`~gymnasium.spaces.MultiDiscrete`
+          - List of single-element sets (e.g.: ``[{2}, {5}]``)
+        * - :class:`~gymnasium.spaces.Dict`
+          - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``)
+        * - :class:`~gymnasium.spaces.Tuple`
+          - Tuple (e.g.: ``(7, [64, 64, 3], {2})``)
+    """
+
+    num_actions: dict[AgentID, int] | None = None
+    """The dimension of the action space for each agent.
+
+    .. warning::
+
+        This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectMARLEnvCfg.action_spaces` instead.
+    """
 
     action_noise_model: dict[AgentID, NoiseModelCfg | None] | None = None
     """The noise model applied to the actions provided to the environment. Default is None, which means no noise is added.
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py
index 5663977fda..2a0e88cb63 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py
@@ -14,6 +14,7 @@
 import weakref
 from abc import abstractmethod
 from collections.abc import Sequence
+from dataclasses import MISSING
 from typing import Any, ClassVar
 
 import omni.isaac.core.utils.torch as torch_utils
@@ -30,6 +31,7 @@
 from .common import VecEnvObs, VecEnvStepReturn
 from .direct_rl_env_cfg import DirectRLEnvCfg
 from .ui import ViewportCameraController
+from .utils.spaces import sample_space, spec_to_gym_space
 
 
 class DirectRLEnv(gym.Env):
@@ -171,7 +173,6 @@ def __init__(self, cfg: DirectRLEnvCfg, render_mode: str | None = None, **kwargs
         self.reset_terminated = torch.zeros(self.num_envs, device=self.device, dtype=torch.bool)
         self.reset_time_outs = torch.zeros_like(self.reset_terminated)
         self.reset_buf = torch.zeros(self.num_envs, dtype=torch.bool, device=self.sim.device)
-        self.actions = torch.zeros(self.num_envs, self.cfg.num_actions, device=self.sim.device)
 
         # setup the action and observation spaces for Gym
         self._configure_gym_env_spaces()
@@ -507,27 +508,40 @@ def set_debug_vis(self, debug_vis: bool) -> bool:
 
     def _configure_gym_env_spaces(self):
         """Configure the action and observation spaces for the Gym environment."""
-        # observation space (unbounded since we don't impose any limits)
-        self.num_actions = self.cfg.num_actions
-        self.num_observations = self.cfg.num_observations
-        self.num_states = self.cfg.num_states
+        # show deprecation message and overwrite configuration
+        if self.cfg.num_actions is not None:
+            omni.log.warn("DirectRLEnvCfg.num_actions is deprecated. Use DirectRLEnvCfg.action_space instead.")
+            if isinstance(self.cfg.action_space, type(MISSING)):
+                self.cfg.action_space = self.cfg.num_actions
+        if self.cfg.num_observations is not None:
+            omni.log.warn(
+                "DirectRLEnvCfg.num_observations is deprecated. Use DirectRLEnvCfg.observation_space instead."
+            )
+            if isinstance(self.cfg.observation_space, type(MISSING)):
+                self.cfg.observation_space = self.cfg.num_observations
+        if self.cfg.num_states is not None:
+            omni.log.warn("DirectRLEnvCfg.num_states is deprecated. Use DirectRLEnvCfg.state_space instead.")
+            if isinstance(self.cfg.state_space, type(MISSING)):
+                self.cfg.state_space = self.cfg.num_states
 
         # set up spaces
         self.single_observation_space = gym.spaces.Dict()
-        self.single_observation_space["policy"] = gym.spaces.Box(
-            low=-np.inf, high=np.inf, shape=(self.num_observations,)
-        )
-        self.single_action_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.num_actions,))
+        self.single_observation_space["policy"] = spec_to_gym_space(self.cfg.observation_space)
+        self.single_action_space = spec_to_gym_space(self.cfg.action_space)
 
         # batch the spaces for vectorized environments
         self.observation_space = gym.vector.utils.batch_space(self.single_observation_space["policy"], self.num_envs)
         self.action_space = gym.vector.utils.batch_space(self.single_action_space, self.num_envs)
 
         # optional state space for asymmetric actor-critic architectures
-        if self.num_states > 0:
-            self.single_observation_space["critic"] = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.num_states,))
+        self.state_space = None
+        if self.cfg.state_space > 0:
+            self.single_observation_space["critic"] = spec_to_gym_space(self.cfg.state_space)
             self.state_space = gym.vector.utils.batch_space(self.single_observation_space["critic"], self.num_envs)
 
+        # instantiate actions (needed for tasks for which the observations computation is dependent on the actions)
+        self.actions = sample_space(self.single_action_space, self.sim.device, batch_size=self.num_envs, fill_value=0)
+
     def _reset_idx(self, env_ids: Sequence[int]):
         """Reset environments based on specified indices.
 
@@ -601,7 +615,7 @@ def _get_states(self) -> VecEnvObs | None:
         """Compute and return the states for the environment.
 
         The state-space is used for asymmetric actor-critic architectures. It is configured
-        using the :attr:`DirectRLEnvCfg.num_states` parameter.
+        using the :attr:`DirectRLEnvCfg.state_space` parameter.
 
         Returns:
             The states for the environment. If the environment does not have a state-space, the function
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py
index ad8c6c18c8..e86b366cc2 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py
@@ -10,7 +10,7 @@
 from omni.isaac.lab.utils import configclass
 from omni.isaac.lab.utils.noise import NoiseModelCfg
 
-from .common import ViewerCfg
+from .common import SpaceType, ViewerCfg
 from .ui import BaseEnvWindow
 
 
@@ -104,13 +104,68 @@ class DirectRLEnvCfg:
     Please refer to the :class:`omni.isaac.lab.managers.EventManager` class for more details.
     """
 
-    num_observations: int = MISSING
-    """The dimension of the observation space from each environment instance."""
+    observation_space: SpaceType = MISSING
+    """Observation space definition.
+
+    The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed
+    specification of the space is desired) or basic Python data types (for simplicity).
+
+    .. list-table::
+        :header-rows: 1
+
+        * - Gymnasium space
+          - Python data type
+        * - :class:`~gymnasium.spaces.Box`
+          - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``)
+        * - :class:`~gymnasium.spaces.Discrete`
+          - Single-element set (e.g.: ``{2}``)
+        * - :class:`~gymnasium.spaces.MultiDiscrete`
+          - List of single-element sets (e.g.: ``[{2}, {5}]``)
+        * - :class:`~gymnasium.spaces.Dict`
+          - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``)
+        * - :class:`~gymnasium.spaces.Tuple`
+          - Tuple (e.g.: ``(7, [64, 64, 3], {2})``)
+    """
+
+    num_observations: int | None = None
+    """The dimension of the observation space from each environment instance.
+
+    .. warning::
+
+        This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectRLEnvCfg.observation_space` instead.
+    """
 
-    num_states: int = 0
-    """The dimension of the state-space from each environment instance. Default is 0, which means no state-space is defined.
+    state_space: SpaceType = MISSING
+    """State space definition.
 
     This is useful for asymmetric actor-critic and defines the observation space for the critic.
+
+    The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed
+    specification of the space is desired) or basic Python data types (for simplicity).
+
+    .. list-table::
+        :header-rows: 1
+
+        * - Gymnasium space
+          - Python data type
+        * - :class:`~gymnasium.spaces.Box`
+          - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``)
+        * - :class:`~gymnasium.spaces.Discrete`
+          - Single-element set (e.g.: ``{2}``)
+        * - :class:`~gymnasium.spaces.MultiDiscrete`
+          - List of single-element sets (e.g.: ``[{2}, {5}]``)
+        * - :class:`~gymnasium.spaces.Dict`
+          - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``)
+        * - :class:`~gymnasium.spaces.Tuple`
+          - Tuple (e.g.: ``(7, [64, 64, 3], {2})``)
+    """
+
+    num_states: int | None = None
+    """The dimension of the state-space from each environment instance.
+
+    .. warning::
+
+        This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectRLEnvCfg.state_space` instead.
     """
 
     observation_noise_model: NoiseModelCfg | None = None
@@ -119,8 +174,36 @@ class DirectRLEnvCfg:
     Please refer to the :class:`omni.isaac.lab.utils.noise.NoiseModel` class for more details.
     """
 
-    num_actions: int = MISSING
-    """The dimension of the action space for each environment."""
+    action_space: SpaceType = MISSING
+    """Action space definition.
+
+    The space can be defined either using Gymnasium :py:mod:`~gymnasium.spaces` (when a more detailed
+    specification of the space is desired) or basic Python data types (for simplicity).
+
+    .. list-table::
+        :header-rows: 1
+
+        * - Gymnasium space
+          - Python data type
+        * - :class:`~gymnasium.spaces.Box`
+          - Integer or list of integers (e.g.: ``7``, ``[64, 64, 3]``)
+        * - :class:`~gymnasium.spaces.Discrete`
+          - Single-element set (e.g.: ``{2}``)
+        * - :class:`~gymnasium.spaces.MultiDiscrete`
+          - List of single-element sets (e.g.: ``[{2}, {5}]``)
+        * - :class:`~gymnasium.spaces.Dict`
+          - Dictionary (e.g.: ``{"joints": 7, "rgb": [64, 64, 3], "gripper": {2}}``)
+        * - :class:`~gymnasium.spaces.Tuple`
+          - Tuple (e.g.: ``(7, [64, 64, 3], {2})``)
+    """
+
+    num_actions: int | None = None
+    """The dimension of the action space for each environment.
+
+    .. warning::
+
+        This attribute is deprecated. Use :attr:`~omni.isaac.lab.envs.DirectRLEnvCfg.action_space` instead.
+    """
 
     action_noise_model: NoiseModelCfg | None = None
     """The noise model applied to the actions provided to the environment. Default is None, which means no noise is added.
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/__init__.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/__init__.py
new file mode 100644
index 0000000000..913e1edb90
--- /dev/null
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Sub-package for environment utils."""
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py
similarity index 76%
rename from source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils.py
rename to source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py
index cacbdeaf81..46519048ae 100644
--- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils.py
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/marl.py
@@ -9,9 +9,9 @@
 import torch
 from typing import Any
 
-from .common import ActionType, AgentID, EnvStepReturn, ObsType, StateType, VecEnvObs, VecEnvStepReturn
-from .direct_marl_env import DirectMARLEnv
-from .direct_rl_env import DirectRLEnv
+from ..common import ActionType, AgentID, EnvStepReturn, ObsType, StateType, VecEnvObs, VecEnvStepReturn
+from ..direct_marl_env import DirectMARLEnv
+from ..direct_rl_env import DirectRLEnv
 
 
 def multi_agent_to_single_agent(env: DirectMARLEnv, state_as_observation: bool = False) -> DirectRLEnv:
@@ -39,7 +39,7 @@ def multi_agent_to_single_agent(env: DirectMARLEnv, state_as_observation: bool =
 
     Raises:
         AssertionError: If the environment state cannot be used as observation since it was explicitly defined
-            as unconstructed (:attr:`DirectMARLEnvCfg.num_states`).
+            as unconstructed (:attr:`DirectMARLEnvCfg.state_space`).
     """
 
     class Env(DirectRLEnv):
@@ -49,7 +49,7 @@ def __init__(self, env: DirectMARLEnv) -> None:
             # check if it is possible to use the multi-agent environment state as single-agent observation
             self._state_as_observation = state_as_observation
             if self._state_as_observation:
-                assert self.env.cfg.num_states != 0, (
+                assert self.env.cfg.state_space != 0, (
                     "The environment state cannot be used as observation since it was explicitly defined as"
                     " unconstructed"
                 )
@@ -58,18 +58,17 @@ def __init__(self, env: DirectMARLEnv) -> None:
             self.cfg = self.env.cfg
             self.sim = self.env.sim
             self.scene = self.env.scene
-            self.num_actions = sum(self.env.cfg.num_actions.values())
-            self.num_observations = sum(self.env.cfg.num_observations.values())
-            self.num_states = self.env.cfg.num_states
 
             self.single_observation_space = gym.spaces.Dict()
             if self._state_as_observation:
                 self.single_observation_space["policy"] = self.env.state_space
             else:
-                self.single_observation_space["policy"] = gym.spaces.Box(
-                    low=-np.inf, high=np.inf, shape=(self.num_observations,)
+                self.single_observation_space["policy"] = gym.spaces.flatten_space(
+                    gym.spaces.Tuple([self.env.observation_spaces[agent] for agent in self.env.possible_agents])
                 )
-            self.single_action_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.num_actions,))
+            self.single_action_space = gym.spaces.flatten_space(
+                gym.spaces.Tuple([self.env.action_spaces[agent] for agent in self.env.possible_agents])
+            )
 
             # batch the spaces for vectorized environments
             self.observation_space = gym.vector.utils.batch_space(
@@ -84,18 +83,25 @@ def reset(self, seed: int | None = None, options: dict[str, Any] | None = None)
             if self._state_as_observation:
                 obs = {"policy": self.env.state()}
             # concatenate agents' observations
+            # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
             else:
-                obs = {"policy": torch.cat([obs[agent] for agent in self.env.possible_agents], dim=-1)}
+                obs = {
+                    "policy": torch.cat(
+                        [obs[agent].reshape(self.num_envs, -1) for agent in self.env.possible_agents], dim=-1
+                    )
+                }
 
             return obs, extras
 
         def step(self, action: torch.Tensor) -> VecEnvStepReturn:
             # split single-agent actions to build the multi-agent ones
+            # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
             index = 0
             _actions = {}
             for agent in self.env.possible_agents:
-                _actions[agent] = action[:, index : index + self.env.cfg.num_actions[agent]]
-                index += self.env.cfg.num_actions[agent]
+                delta = gym.spaces.flatdim(self.env.action_spaces[agent])
+                _actions[agent] = action[:, index : index + delta]
+                index += delta
 
             # step the environment
             obs, rewards, terminated, time_outs, extras = self.env.step(_actions)
@@ -104,8 +110,13 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
             if self._state_as_observation:
                 obs = {"policy": self.env.state()}
             # concatenate agents' observations
+            # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
             else:
-                obs = {"policy": torch.cat([obs[agent] for agent in self.env.possible_agents], dim=-1)}
+                obs = {
+                    "policy": torch.cat(
+                        [obs[agent].reshape(self.num_envs, -1) for agent in self.env.possible_agents], dim=-1
+                    )
+                }
 
             # process environment outputs to return single-agent data
             rewards = sum(rewards.values())
@@ -147,7 +158,7 @@ def multi_agent_with_one_agent(env: DirectMARLEnv, state_as_observation: bool =
 
     Raises:
         AssertionError: If the environment state cannot be used as observation since it was explicitly defined
-            as unconstructed (:attr:`DirectMARLEnvCfg.num_states`).
+            as unconstructed (:attr:`DirectMARLEnvCfg.state_space`).
     """
 
     class Env(DirectMARLEnv):
@@ -157,7 +168,7 @@ def __init__(self, env: DirectMARLEnv) -> None:
             # check if it is possible to use the multi-agent environment state as agent observation
             self._state_as_observation = state_as_observation
             if self._state_as_observation:
-                assert self.env.cfg.num_states != 0, (
+                assert self.env.cfg.state_space != 0, (
                     "The environment state cannot be used as observation since it was explicitly defined as"
                     " unconstructed"
                 )
@@ -170,13 +181,13 @@ def __init__(self, env: DirectMARLEnv) -> None:
                 self._exported_observation_spaces = {self._agent_id: self.env.state_space}
             else:
                 self._exported_observation_spaces = {
-                    self._agent_id: gym.spaces.Box(
-                        low=-np.inf, high=np.inf, shape=(sum(self.env.cfg.num_observations.values()),)
+                    self._agent_id: gym.spaces.flatten_space(
+                        gym.spaces.Tuple([self.env.observation_spaces[agent] for agent in self.env.possible_agents])
                     )
                 }
             self._exported_action_spaces = {
-                self._agent_id: gym.spaces.Box(
-                    low=-np.inf, high=np.inf, shape=(sum(self.env.cfg.num_actions.values()),)
+                self._agent_id: gym.spaces.flatten_space(
+                    gym.spaces.Tuple([self.env.action_spaces[agent] for agent in self.env.possible_agents])
                 )
             }
 
@@ -208,18 +219,25 @@ def reset(
             if self._state_as_observation:
                 obs = {self._agent_id: self.env.state()}
             # concatenate agents' observations
+            # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
             else:
-                obs = {self._agent_id: torch.cat([obs[agent] for agent in self.env.possible_agents], dim=-1)}
+                obs = {
+                    self._agent_id: torch.cat(
+                        [obs[agent].reshape(self.num_envs, -1) for agent in self.env.possible_agents], dim=-1
+                    )
+                }
 
             return obs, extras
 
         def step(self, actions: dict[AgentID, ActionType]) -> EnvStepReturn:
             # split agent actions to build the multi-agent ones
+            # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
             index = 0
             _actions = {}
             for agent in self.env.possible_agents:
-                _actions[agent] = actions[self._agent_id][:, index : index + self.env.cfg.num_actions[agent]]
-                index += self.env.cfg.num_actions[agent]
+                delta = gym.spaces.flatdim(self.env.action_spaces[agent])
+                _actions[agent] = actions[self._agent_id][:, index : index + delta]
+                index += delta
 
             # step the environment
             obs, rewards, terminated, time_outs, extras = self.env.step(_actions)
@@ -228,8 +246,13 @@ def step(self, actions: dict[AgentID, ActionType]) -> EnvStepReturn:
             if self._state_as_observation:
                 obs = {self._agent_id: self.env.state()}
             # concatenate agents' observations
+            # FIXME: This implementation assumes the spaces are fundamental ones. Fix it to support composite spaces
             else:
-                obs = {self._agent_id: torch.cat([obs[agent] for agent in self.env.possible_agents], dim=-1)}
+                obs = {
+                    self._agent_id: torch.cat(
+                        [obs[agent].reshape(self.num_envs, -1) for agent in self.env.possible_agents], dim=-1
+                    )
+                }
 
             # process environment outputs to return agent data
             rewards = {self._agent_id: sum(rewards.values())}
diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/spaces.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/spaces.py
new file mode 100644
index 0000000000..8604392ec6
--- /dev/null
+++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/utils/spaces.py
@@ -0,0 +1,92 @@
+# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import gymnasium as gym
+import numpy as np
+import torch
+from typing import Any
+
+from ..common import SpaceType
+
+
+def spec_to_gym_space(spec: SpaceType) -> gym.spaces.Space:
+    """Generate an appropriate Gymnasium space according to the given space specification.
+
+    Args:
+        spec: Space specification.
+
+    Returns:
+        Gymnasium space.
+
+    Raises:
+        ValueError: If the given space specification is not valid/supported.
+    """
+    if isinstance(spec, gym.spaces.Space):
+        return spec
+    # fundamental spaces
+    # Box
+    elif isinstance(spec, int):
+        return gym.spaces.Box(low=-np.inf, high=np.inf, shape=(spec,))
+    elif isinstance(spec, list) and all(isinstance(x, int) for x in spec):
+        return gym.spaces.Box(low=-np.inf, high=np.inf, shape=spec)
+    # Discrete
+    elif isinstance(spec, set) and len(spec) == 1:
+        return gym.spaces.Discrete(n=next(iter(spec)))
+    # MultiDiscrete
+    elif isinstance(spec, list) and all(isinstance(x, set) and len(x) == 1 for x in spec):
+        return gym.spaces.MultiDiscrete(nvec=[next(iter(x)) for x in spec])
+    # composite spaces
+    # Tuple
+    elif isinstance(spec, tuple):
+        return gym.spaces.Tuple([spec_to_gym_space(x) for x in spec])
+    # Dict
+    elif isinstance(spec, dict):
+        return gym.spaces.Dict({k: spec_to_gym_space(v) for k, v in spec.items()})
+    raise ValueError(f"Unsupported space specification: {spec}")
+
+
+def sample_space(space: gym.spaces.Space, device: str, batch_size: int = -1, fill_value: float | None = None) -> Any:
+    """Sample a Gymnasium space where the data container are PyTorch tensors.
+
+    Args:
+        space: Gymnasium space.
+        device: The device where the tensor should be created.
+        batch_size: Batch size. If the specified value is greater than zero, a batched space will be created and sampled from it.
+        fill_value: The value to fill the created tensors with. If None (default value), tensors will keep their random values.
+
+    Returns:
+        Tensorized sampled space.
+    """
+
+    def tensorize(s, x):
+        if isinstance(s, gym.spaces.Box):
+            tensor = torch.tensor(x, device=device, dtype=torch.float32).reshape(batch_size, *s.shape)
+            if fill_value is not None:
+                tensor.fill_(fill_value)
+            return tensor
+        elif isinstance(s, gym.spaces.Discrete):
+            if isinstance(x, np.ndarray):
+                tensor = torch.tensor(x, device=device, dtype=torch.int64).reshape(batch_size, 1)
+                if fill_value is not None:
+                    tensor.fill_(int(fill_value))
+                return tensor
+            elif isinstance(x, np.number) or type(x) in [int, float]:
+                tensor = torch.tensor([x], device=device, dtype=torch.int64).reshape(batch_size, 1)
+                if fill_value is not None:
+                    tensor.fill_(int(fill_value))
+                return tensor
+        elif isinstance(s, gym.spaces.MultiDiscrete):
+            if isinstance(x, np.ndarray):
+                tensor = torch.tensor(x, device=device, dtype=torch.int64).reshape(batch_size, *s.shape)
+                if fill_value is not None:
+                    tensor.fill_(int(fill_value))
+                return tensor
+        elif isinstance(s, gym.spaces.Dict):
+            return {k: tensorize(_s, x[k]) for k, _s in s.items()}
+        elif isinstance(s, gym.spaces.Tuple):
+            return tuple([tensorize(_s, v) for _s, v in zip(s, x)])
+
+    sample = (gym.vector.utils.batch_space(space, batch_size) if batch_size > 0 else space).sample()
+    return tensorize(space, sample)
diff --git a/source/extensions/omni.isaac.lab/test/envs/test_spaces_utils.py b/source/extensions/omni.isaac.lab/test/envs/test_spaces_utils.py
new file mode 100644
index 0000000000..274f0de650
--- /dev/null
+++ b/source/extensions/omni.isaac.lab/test/envs/test_spaces_utils.py
@@ -0,0 +1,122 @@
+# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+# ignore private usage of variables warning
+# pyright: reportPrivateUsage=none
+
+from __future__ import annotations
+
+"""Launch Isaac Sim Simulator first."""
+
+from omni.isaac.lab.app import AppLauncher, run_tests
+
+# Can set this to False to see the GUI for debugging
+HEADLESS = True
+
+# launch omniverse app
+app_launcher = AppLauncher(headless=HEADLESS)
+simulation_app = app_launcher.app
+
+"""Rest everything follows."""
+
+import numpy as np
+import torch
+import unittest
+from gymnasium.spaces import Box, Dict, Discrete, MultiDiscrete, Tuple
+
+from omni.isaac.lab.envs.utils.spaces import sample_space, spec_to_gym_space
+
+
+class TestSpacesUtils(unittest.TestCase):
+    """Test for spaces utils' functions"""
+
+    """
+    Tests
+    """
+
+    def test_spec_to_gym_space(self):
+        # fundamental spaces
+        # Box
+        space = spec_to_gym_space(1)
+        self.assertIsInstance(space, Box)
+        self.assertEqual(space.shape, (1,))
+        space = spec_to_gym_space([1, 2, 3, 4, 5])
+        self.assertIsInstance(space, Box)
+        self.assertEqual(space.shape, (1, 2, 3, 4, 5))
+        space = spec_to_gym_space(Box(low=-1.0, high=1.0, shape=(1, 2)))
+        self.assertIsInstance(space, Box)
+        # Discrete
+        space = spec_to_gym_space({2})
+        self.assertIsInstance(space, Discrete)
+        self.assertEqual(space.n, 2)
+        space = spec_to_gym_space(Discrete(2))
+        self.assertIsInstance(space, Discrete)
+        # MultiDiscrete
+        space = spec_to_gym_space([{1}, {2}, {3}])
+        self.assertIsInstance(space, MultiDiscrete)
+        self.assertEqual(space.nvec.shape, (3,))
+        space = spec_to_gym_space(MultiDiscrete(np.array([1, 2, 3])))
+        self.assertIsInstance(space, MultiDiscrete)
+        # composite spaces
+        # Tuple
+        space = spec_to_gym_space(([1, 2, 3, 4, 5], {2}, [{1}, {2}, {3}]))
+        self.assertIsInstance(space, Tuple)
+        self.assertEqual(len(space), 3)
+        self.assertIsInstance(space[0], Box)
+        self.assertIsInstance(space[1], Discrete)
+        self.assertIsInstance(space[2], MultiDiscrete)
+        space = spec_to_gym_space(Tuple((Box(-1, 1, shape=(1,)), Discrete(2))))
+        self.assertIsInstance(space, Tuple)
+        # Dict
+        space = spec_to_gym_space({"box": [1, 2, 3, 4, 5], "discrete": {2}, "multi_discrete": [{1}, {2}, {3}]})
+        self.assertIsInstance(space, Dict)
+        self.assertEqual(len(space), 3)
+        self.assertIsInstance(space["box"], Box)
+        self.assertIsInstance(space["discrete"], Discrete)
+        self.assertIsInstance(space["multi_discrete"], MultiDiscrete)
+        space = spec_to_gym_space(Dict({"box": Box(-1, 1, shape=(1,)), "discrete": Discrete(2)}))
+        self.assertIsInstance(space, Dict)
+
+    def test_sample_space(self):
+        device = "cpu"
+        # fundamental spaces
+        # Box
+        sample = sample_space(Box(low=-1.0, high=1.0, shape=(1, 2)), device, batch_size=1)
+        self.assertIsInstance(sample, torch.Tensor)
+        self._check_tensorized(sample, batch_size=1)
+        # Discrete
+        sample = sample_space(Discrete(2), device, batch_size=2)
+        self.assertIsInstance(sample, torch.Tensor)
+        self._check_tensorized(sample, batch_size=2)
+        # MultiDiscrete
+        sample = sample_space(MultiDiscrete(np.array([1, 2, 3])), device, batch_size=3)
+        self.assertIsInstance(sample, torch.Tensor)
+        self._check_tensorized(sample, batch_size=3)
+        # composite spaces
+        # Tuple
+        sample = sample_space(Tuple((Box(-1, 1, shape=(1,)), Discrete(2))), device, batch_size=4)
+        self.assertIsInstance(sample, (tuple, list))
+        self._check_tensorized(sample, batch_size=4)
+        # Dict
+        sample = sample_space(Dict({"box": Box(-1, 1, shape=(1,)), "discrete": Discrete(2)}), device, batch_size=5)
+        self.assertIsInstance(sample, dict)
+        self._check_tensorized(sample, batch_size=5)
+
+    """
+    Helper functions.
+    """
+
+    def _check_tensorized(self, sample, batch_size):
+        if isinstance(sample, (tuple, list)):
+            list(map(self._check_tensorized, sample, [batch_size] * len(sample)))
+        elif isinstance(sample, dict):
+            list(map(self._check_tensorized, sample.values(), [batch_size] * len(sample)))
+        else:
+            self.assertIsInstance(sample, torch.Tensor)
+            self.assertEqual(sample.shape[0], batch_size)
+
+
+if __name__ == "__main__":
+    run_tests()
diff --git a/source/extensions/omni.isaac.lab_tasks/config/extension.toml b/source/extensions/omni.isaac.lab_tasks/config/extension.toml
index 89ca646936..a6ecb7a56c 100644
--- a/source/extensions/omni.isaac.lab_tasks/config/extension.toml
+++ b/source/extensions/omni.isaac.lab_tasks/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.10.5"
+version = "0.10.7"
 
 # Description
 title = "Isaac Lab Environments"
diff --git a/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst b/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst
index 2614630bd7..b3ba0a77fd 100644
--- a/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst
+++ b/source/extensions/omni.isaac.lab_tasks/docs/CHANGELOG.rst
@@ -1,11 +1,24 @@
 Changelog
 ---------
 
+0.10.7 (2024-10-02)
+~~~~~~~~~~~~~~~~~~~
+
+Changed
+^^^^^^^
+
+* Replace deprecated :attr:`num_observations`, :attr:`num_actions` and :attr:`num_states` in single-agent direct tasks
+  by :attr:`observation_space`, :attr:`action_space` and :attr:`state_space` respectively.
+* Replace deprecated :attr:`num_observations`, :attr:`num_actions` and :attr:`num_states` in multi-agent direct tasks
+  by :attr:`observation_spaces`, :attr:`action_spaces` and :attr:`state_space` respectively.
+
+
 0.10.6 (2024-09-25)
 ~~~~~~~~~~~~~~~~~~~
 
 Added
 ^^^^^
+
 * Added ``Isaac-Cartpole-RGB-Camera-v0`` and ``Isaac-Cartpole-Depth-Camera-v0``
   manager based camera cartpole environments.
 
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/allegro_hand/allegro_hand_env_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/allegro_hand/allegro_hand_env_cfg.py
index b83b6782a6..b5c53a91d3 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/allegro_hand/allegro_hand_env_cfg.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/allegro_hand/allegro_hand_env_cfg.py
@@ -22,9 +22,9 @@ class AllegroHandEnvCfg(DirectRLEnvCfg):
     # env
     decimation = 4
     episode_length_s = 10.0
-    num_actions = 16
-    num_observations = 124  # (full)
-    num_states = 0
+    action_space = 16
+    observation_space = 124  # (full)
+    state_space = 0
     asymmetric_obs = False
     obs_type = "full"
     # simulation
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/ant/ant_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/ant/ant_env.py
index 8bf6d6bcc9..42f57127ee 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/ant/ant_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/ant/ant_env.py
@@ -24,9 +24,9 @@ class AntEnvCfg(DirectRLEnvCfg):
     episode_length_s = 15.0
     decimation = 2
     action_scale = 0.5
-    num_actions = 8
-    num_observations = 36
-    num_states = 0
+    action_space = 8
+    observation_space = 36
+    state_space = 0
 
     # simulation
     sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/anymal_c/anymal_c_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/anymal_c/anymal_c_env.py
index 5490bb0dd3..ca1f61c54a 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/anymal_c/anymal_c_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/anymal_c/anymal_c_env.py
@@ -5,6 +5,7 @@
 
 from __future__ import annotations
 
+import gymnasium as gym
 import torch
 
 import omni.isaac.lab.envs.mdp as mdp
@@ -59,9 +60,9 @@ class AnymalCFlatEnvCfg(DirectRLEnvCfg):
     episode_length_s = 20.0
     decimation = 4
     action_scale = 0.5
-    num_actions = 12
-    num_observations = 48
-    num_states = 0
+    action_space = 12
+    observation_space = 48
+    state_space = 0
 
     # simulation
     sim: SimulationCfg = SimulationCfg(
@@ -118,7 +119,7 @@ class AnymalCFlatEnvCfg(DirectRLEnvCfg):
 @configclass
 class AnymalCRoughEnvCfg(AnymalCFlatEnvCfg):
     # env
-    num_observations = 235
+    observation_space = 235
 
     terrain = TerrainImporterCfg(
         prim_path="/World/ground",
@@ -160,8 +161,10 @@ def __init__(self, cfg: AnymalCFlatEnvCfg | AnymalCRoughEnvCfg, render_mode: str
         super().__init__(cfg, render_mode, **kwargs)
 
         # Joint position command (deviation from default joint positions)
-        self._actions = torch.zeros(self.num_envs, self.cfg.num_actions, device=self.device)
-        self._previous_actions = torch.zeros(self.num_envs, self.cfg.num_actions, device=self.device)
+        self._actions = torch.zeros(self.num_envs, gym.spaces.flatdim(self.single_action_space), device=self.device)
+        self._previous_actions = torch.zeros(
+            self.num_envs, gym.spaces.flatdim(self.single_action_space), device=self.device
+        )
 
         # X/Y linear velocity and yaw angular velocity commands
         self._commands = torch.zeros(self.num_envs, 3, device=self.device)
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py
index 0b606fe899..ad8c616940 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cart_double_pendulum/cart_double_pendulum_env.py
@@ -27,9 +27,9 @@ class CartDoublePendulumEnvCfg(DirectMARLEnvCfg):
     decimation = 2
     episode_length_s = 5.0
     possible_agents = ["cart", "pendulum"]
-    num_actions = {"cart": 1, "pendulum": 1}
-    num_observations = {"cart": 4, "pendulum": 3}
-    num_states = -1
+    action_spaces = {"cart": 1, "pendulum": 1}
+    observation_spaces = {"cart": 4, "pendulum": 3}
+    state_space = -1
 
     # simulation
     sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_camera_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_camera_env.py
index b2a1b1e303..dc7db07030 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_camera_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_camera_env.py
@@ -5,9 +5,7 @@
 
 from __future__ import annotations
 
-import gymnasium as gym
 import math
-import numpy as np
 import torch
 from collections.abc import Sequence
 
@@ -29,9 +27,6 @@ class CartpoleRGBCameraEnvCfg(DirectRLEnvCfg):
     decimation = 2
     episode_length_s = 5.0
     action_scale = 100.0  # [N]
-    num_actions = 1
-    num_channels = 3
-    num_states = 0
 
     # simulation
     sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
@@ -52,9 +47,13 @@ class CartpoleRGBCameraEnvCfg(DirectRLEnvCfg):
         width=80,
         height=80,
     )
-    num_observations = num_channels * tiled_camera.height * tiled_camera.width
     write_image_to_file = False
 
+    # spaces
+    action_space = 1
+    state_space = 0
+    observation_space = [tiled_camera.height, tiled_camera.width, 3]
+
     # change viewer settings
     viewer = ViewerCfg(eye=(20.0, 20.0, 20.0))
 
@@ -87,9 +86,8 @@ class CartpoleDepthCameraEnvCfg(CartpoleRGBCameraEnvCfg):
         height=80,
     )
 
-    # env
-    num_channels = 1
-    num_observations = num_channels * tiled_camera.height * tiled_camera.width
+    # spaces
+    observation_space = [tiled_camera.height, tiled_camera.width, 1]
 
 
 class CartpoleCameraEnv(DirectRLEnv):
@@ -118,35 +116,6 @@ def close(self):
         """Cleanup for the environment."""
         super().close()
 
-    def _configure_gym_env_spaces(self):
-        """Configure the action and observation spaces for the Gym environment."""
-        # observation space (unbounded since we don't impose any limits)
-        self.num_actions = self.cfg.num_actions
-        self.num_observations = self.cfg.num_observations
-        self.num_states = self.cfg.num_states
-
-        # set up spaces
-        self.single_observation_space = gym.spaces.Dict()
-        self.single_observation_space["policy"] = gym.spaces.Box(
-            low=-np.inf,
-            high=np.inf,
-            shape=(self.cfg.tiled_camera.height, self.cfg.tiled_camera.width, self.cfg.num_channels),
-        )
-        if self.num_states > 0:
-            self.single_observation_space["critic"] = gym.spaces.Box(
-                low=-np.inf,
-                high=np.inf,
-                shape=(self.cfg.tiled_camera.height, self.cfg.tiled_camera.width, self.cfg.num_channels),
-            )
-        self.single_action_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.num_actions,))
-
-        # batch the spaces for vectorized environments
-        self.observation_space = gym.vector.utils.batch_space(self.single_observation_space, self.num_envs)
-        self.action_space = gym.vector.utils.batch_space(self.single_action_space, self.num_envs)
-
-        # RL specifics
-        self.actions = torch.zeros(self.num_envs, self.num_actions, device=self.sim.device)
-
     def _setup_scene(self):
         """Setup the scene with the cartpole and camera."""
         self._cartpole = Articulation(self.cfg.robot_cfg)
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_env.py
index 44926e95f9..534fb26443 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/cartpole/cartpole_env.py
@@ -27,9 +27,9 @@ class CartpoleEnvCfg(DirectRLEnvCfg):
     decimation = 2
     episode_length_s = 5.0
     action_scale = 100.0  # [N]
-    num_actions = 1
-    num_observations = 4
-    num_states = 0
+    action_space = 1
+    observation_space = 4
+    state_space = 0
 
     # simulation
     sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/franka_cabinet_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/franka_cabinet_env.py
index 4eb01953fe..3a6a480ed0 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/franka_cabinet_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/franka_cabinet/franka_cabinet_env.py
@@ -28,9 +28,9 @@ class FrankaCabinetEnvCfg(DirectRLEnvCfg):
     # env
     episode_length_s = 8.3333  # 500 timesteps
     decimation = 2
-    num_actions = 9
-    num_observations = 23
-    num_states = 0
+    action_space = 9
+    observation_space = 23
+    state_space = 0
 
     # simulation
     sim: SimulationCfg = SimulationCfg(
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py
index bfaf8f8190..2a4d330e6a 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py
@@ -24,9 +24,9 @@ class HumanoidEnvCfg(DirectRLEnvCfg):
     episode_length_s = 15.0
     decimation = 2
     action_scale = 1.0
-    num_actions = 21
-    num_observations = 75
-    num_states = 0
+    action_space = 21
+    observation_space = 75
+    state_space = 0
 
     # simulation
     sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation)
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/quadcopter/quadcopter_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/quadcopter/quadcopter_env.py
index c6df659ec6..97156618f1 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/quadcopter/quadcopter_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/quadcopter/quadcopter_env.py
@@ -5,6 +5,7 @@
 
 from __future__ import annotations
 
+import gymnasium as gym
 import torch
 
 import omni.isaac.lab.sim as sim_utils
@@ -50,9 +51,9 @@ class QuadcopterEnvCfg(DirectRLEnvCfg):
     # env
     episode_length_s = 10.0
     decimation = 2
-    num_actions = 4
-    num_observations = 12
-    num_states = 0
+    action_space = 4
+    observation_space = 12
+    state_space = 0
     debug_vis = True
 
     ui_window_class_type = QuadcopterEnvWindow
@@ -105,7 +106,7 @@ def __init__(self, cfg: QuadcopterEnvCfg, render_mode: str | None = None, **kwar
         super().__init__(cfg, render_mode, **kwargs)
 
         # Total thrust and moment applied to the base of the quadcopter
-        self._actions = torch.zeros(self.num_envs, self.cfg.num_actions, device=self.device)
+        self._actions = torch.zeros(self.num_envs, gym.spaces.flatdim(self.single_action_space), device=self.device)
         self._thrust = torch.zeros(self.num_envs, 1, 3, device=self.device)
         self._moment = torch.zeros(self.num_envs, 1, 3, device=self.device)
         # Goal position
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_env_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_env_cfg.py
index f4b8407296..af88124792 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_env_cfg.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_env_cfg.py
@@ -119,9 +119,9 @@ class ShadowHandEnvCfg(DirectRLEnvCfg):
     # env
     decimation = 2
     episode_length_s = 10.0
-    num_actions = 20
-    num_observations = 157  # (full)
-    num_states = 0
+    action_space = 20
+    observation_space = 157  # (full)
+    state_space = 0
     asymmetric_obs = False
     obs_type = "full"
 
@@ -232,9 +232,9 @@ class ShadowHandOpenAIEnvCfg(ShadowHandEnvCfg):
     # env
     decimation = 3
     episode_length_s = 8.0
-    num_actions = 20
-    num_observations = 42
-    num_states = 187
+    action_space = 20
+    observation_space = 42
+    state_space = 187
     asymmetric_obs = True
     obs_type = "openai"
     # simulation
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_vision_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_vision_env.py
index b025bfb052..492074d8a9 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_vision_env.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand/shadow_hand_vision_env.py
@@ -48,8 +48,8 @@ class ShadowHandVisionEnvCfg(ShadowHandEnvCfg):
     feature_extractor = FeatureExtractorCfg()
 
     # env
-    num_observations = 164 + 27  # state observation + vision CNN embedding
-    num_states = 187 + 27  # asymettric states + vision CNN embedding
+    observation_space = 164 + 27  # state observation + vision CNN embedding
+    state_space = 187 + 27  # asymettric states + vision CNN embedding
 
 
 @configclass
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand_over/shadow_hand_over_env_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand_over/shadow_hand_over_env_cfg.py
index d6dbb3d6a2..d3a7c33b3f 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand_over/shadow_hand_over_env_cfg.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/shadow_hand_over/shadow_hand_over_env_cfg.py
@@ -118,9 +118,9 @@ class ShadowHandOverEnvCfg(DirectMARLEnvCfg):
     decimation = 2
     episode_length_s = 7.5
     possible_agents = ["right_hand", "left_hand"]
-    num_actions = {"right_hand": 20, "left_hand": 20}
-    num_observations = {"right_hand": 157, "left_hand": 157}
-    num_states = 290
+    action_spaces = {"right_hand": 20, "left_hand": 20}
+    observation_spaces = {"right_hand": 157, "left_hand": 157}
+    state_space = 290
 
     # simulation
     sim: SimulationCfg = SimulationCfg(
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/utils/wrappers/rsl_rl/vecenv_wrapper.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/utils/wrappers/rsl_rl/vecenv_wrapper.py
index 0badd08c31..0dedef9ef0 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/utils/wrappers/rsl_rl/vecenv_wrapper.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/utils/wrappers/rsl_rl/vecenv_wrapper.py
@@ -70,19 +70,19 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv):
         if hasattr(self.unwrapped, "action_manager"):
             self.num_actions = self.unwrapped.action_manager.total_action_dim
         else:
-            self.num_actions = self.unwrapped.num_actions
+            self.num_actions = gym.spaces.flatdim(self.unwrapped.single_action_space)
         if hasattr(self.unwrapped, "observation_manager"):
             self.num_obs = self.unwrapped.observation_manager.group_obs_dim["policy"][0]
         else:
-            self.num_obs = self.unwrapped.num_observations
+            self.num_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["policy"])
         # -- privileged observations
         if (
             hasattr(self.unwrapped, "observation_manager")
             and "critic" in self.unwrapped.observation_manager.group_obs_dim
         ):
             self.num_privileged_obs = self.unwrapped.observation_manager.group_obs_dim["critic"][0]
-        elif hasattr(self.unwrapped, "num_states"):
-            self.num_privileged_obs = self.unwrapped.num_states
+        elif hasattr(self.unwrapped, "num_states") and "critic" in self.unwrapped.single_observation_space:
+            self.num_privileged_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["critic"])
         else:
             self.num_privileged_obs = 0
         # reset at the start since the RSL-RL runner does not call reset
diff --git a/source/extensions/omni.isaac.lab_tasks/test/test_environments.py b/source/extensions/omni.isaac.lab_tasks/test/test_environments.py
index 9e92e26156..993b776a81 100644
--- a/source/extensions/omni.isaac.lab_tasks/test/test_environments.py
+++ b/source/extensions/omni.isaac.lab_tasks/test/test_environments.py
@@ -22,6 +22,7 @@
 import omni.usd
 
 from omni.isaac.lab.envs import ManagerBasedRLEnvCfg
+from omni.isaac.lab.envs.utils import sample_space
 
 import omni.isaac.lab_tasks  # noqa: F401
 from omni.isaac.lab_tasks.utils.parse_cfg import parse_env_cfg
@@ -108,12 +109,12 @@ def _check_random_actions(self, task_name: str, device: str, num_envs: int, num_
         # simulate environment for num_steps steps
         with torch.inference_mode():
             for _ in range(num_steps):
-                # sample actions from -1 to 1
-                actions = 2 * torch.rand(env.action_space.shape, device=env.unwrapped.device) - 1
+                # sample actions according to the defined space
+                actions = sample_space(env.single_action_space, device=env.unwrapped.device, batch_size=num_envs)
                 # apply actions
                 transition = env.step(actions)
                 # check signals
-                for data in transition:
+                for data in transition[:-1]:  # exclude info
                     self.assertTrue(self._check_valid_tensor(data), msg=f"Invalid data: {data}")
 
         # close the environment
@@ -131,14 +132,10 @@ def _check_valid_tensor(data: torch.Tensor | dict) -> bool:
         """
         if isinstance(data, torch.Tensor):
             return not torch.any(torch.isnan(data))
+        elif isinstance(data, (tuple, list)):
+            return all(TestEnvironments._check_valid_tensor(value) for value in data)
         elif isinstance(data, dict):
-            valid_tensor = True
-            for value in data.values():
-                if isinstance(value, dict):
-                    valid_tensor &= TestEnvironments._check_valid_tensor(value)
-                elif isinstance(value, torch.Tensor):
-                    valid_tensor &= not torch.any(torch.isnan(value))
-            return valid_tensor
+            return all(TestEnvironments._check_valid_tensor(value) for value in data.values())
         else:
             raise ValueError(f"Input data of invalid type: {type(data)}.")
 
diff --git a/source/extensions/omni.isaac.lab_tasks/test/test_multi_agent_environments.py b/source/extensions/omni.isaac.lab_tasks/test/test_multi_agent_environments.py
index 19fcd88936..2f543a84e3 100644
--- a/source/extensions/omni.isaac.lab_tasks/test/test_multi_agent_environments.py
+++ b/source/extensions/omni.isaac.lab_tasks/test/test_multi_agent_environments.py
@@ -21,6 +21,7 @@
 import omni.usd
 
 from omni.isaac.lab.envs import DirectMARLEnv, DirectMARLEnvCfg
+from omni.isaac.lab.envs.utils import sample_space
 
 import omni.isaac.lab_tasks  # noqa: F401
 from omni.isaac.lab_tasks.utils.parse_cfg import parse_env_cfg
@@ -104,9 +105,9 @@ def _check_random_actions(self, task_name: str, device: str, num_envs: int, num_
         # simulate environment for num_steps steps
         with torch.inference_mode():
             for _ in range(num_steps):
-                # sample actions from -1 to 1
+                # sample actions according to the defined space
                 actions = {
-                    agent: 2 * torch.rand(env.action_space(agent).shape, device=env.unwrapped.device) - 1
+                    agent: sample_space(env.action_spaces[agent], device=env.unwrapped.device)
                     for agent in env.unwrapped.possible_agents
                 }
                 # apply actions
@@ -131,14 +132,10 @@ def _check_valid_tensor(data: torch.Tensor | dict) -> bool:
         """
         if isinstance(data, torch.Tensor):
             return not torch.any(torch.isnan(data))
+        elif isinstance(data, (tuple, list)):
+            return all(TestEnvironments._check_valid_tensor(value) for value in data)
         elif isinstance(data, dict):
-            valid_tensor = True
-            for value in data.values():
-                if isinstance(value, dict):
-                    valid_tensor &= TestEnvironments._check_valid_tensor(value)
-                elif isinstance(value, torch.Tensor):
-                    valid_tensor &= not torch.any(torch.isnan(value))
-            return valid_tensor
+            return all(TestEnvironments._check_valid_tensor(value) for value in data.values())
         else:
             raise ValueError(f"Input data of invalid type: {type(data)}.")