diff --git a/docs/index.rst b/docs/index.rst index d7f0ed2d9f..7d63040bae 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -75,6 +75,7 @@ Table of Contents source/features/tiled_rendering source/features/environments source/features/actuators + source/features/reproducibility .. source/features/motion_generators .. toctree:: diff --git a/docs/source/features/environments.rst b/docs/source/features/environments.rst index 58af134004..7e9c142b63 100644 --- a/docs/source/features/environments.rst +++ b/docs/source/features/environments.rst @@ -24,16 +24,16 @@ Classic environments that are based on IsaacGymEnvs implementation of MuJoCo-sty +------------------+-----------------------------+-------------------------------------------------------------------------+ | World | Environment ID | Description | +==================+=============================+=========================================================================+ - | |humanoid| | | |humanoid-link| | Move towards a direction with the MuJoCo humanoid robot | - | | | |humanoid-direct-link| | | + | |humanoid| | |humanoid-link| | Move towards a direction with the MuJoCo humanoid robot | + | | |humanoid-direct-link| | | +------------------+-----------------------------+-------------------------------------------------------------------------+ - | |ant| | | |ant-link| | Move towards a direction with the MuJoCo ant robot | - | | | |ant-direct-link| | | + | |ant| | |ant-link| | Move towards a direction with the MuJoCo ant robot | + | | |ant-direct-link| | | +------------------+-----------------------------+-------------------------------------------------------------------------+ - | |cartpole| | | |cartpole-link| | Move the cart to keep the pole upwards in the classic cartpole control | - | | | |cartpole-direct-link| | | - | | | |cartpole-camera-rgb-link|| | - | | | |cartpole-camera-dpt-link|| | + | |cartpole| | |cartpole-link| | Move the cart to keep the pole upwards in the classic cartpole control | + | | |cartpole-direct-link| | | + | | |cartpole-camera-rgb-link| | | + | | |cartpole-camera-dpt-link| | | +------------------+-----------------------------+-------------------------------------------------------------------------+ .. |humanoid| image:: ../_static/tasks/classic/humanoid.jpg @@ -77,12 +77,12 @@ for the reach environment: +----------------+---------------------------+-----------------------------------------------------------------------------+ | |cabi-franka| | |cabi-franka-link| | Grasp the handle of a cabinet's drawer and open it with the Franka robot | +----------------+---------------------------+-----------------------------------------------------------------------------+ - | |cube-allegro| | | |cube-allegro-link| | In-hand reorientation of a cube using Allegro hand | - | | | |allegro-direct-link| | | + | |cube-allegro| | |cube-allegro-link| | In-hand reorientation of a cube using Allegro hand | + | | |allegro-direct-link| | | +----------------+---------------------------+-----------------------------------------------------------------------------+ - | |cube-shadow| | | |cube-shadow-link| | In-hand reorientation of a cube using Shadow hand | - | | | |cube-shadow-ff-link| | | - | | | |cube-shadow-lstm-link| | | + | |cube-shadow| | |cube-shadow-link| | In-hand reorientation of a cube using Shadow hand | + | | |cube-shadow-ff-link| | | + | | |cube-shadow-lstm-link| | | +----------------+---------------------------+-----------------------------------------------------------------------------+ .. |reach-franka| image:: ../_static/tasks/manipulation/franka_reach.jpg @@ -120,11 +120,11 @@ Environments based on legged locomotion tasks. +------------------------------+----------------------------------------------+------------------------------------------------------------------------------+ | |velocity-rough-anymal-b| | |velocity-rough-anymal-b-link| | Track a velocity command on rough terrain with the Anymal B robot | +------------------------------+----------------------------------------------+------------------------------------------------------------------------------+ - | |velocity-flat-anymal-c| | | |velocity-flat-anymal-c-link| | Track a velocity command on flat terrain with the Anymal C robot | - | | | |velocity-flat-anymal-c-direct-link| | | + | |velocity-flat-anymal-c| | |velocity-flat-anymal-c-link| | Track a velocity command on flat terrain with the Anymal C robot | + | | |velocity-flat-anymal-c-direct-link| | | +------------------------------+----------------------------------------------+------------------------------------------------------------------------------+ - | |velocity-rough-anymal-c| | | |velocity-rough-anymal-c-link| | Track a velocity command on rough terrain with the Anymal C robot | - | | | |velocity-rough-anymal-c-direct-link| | | + | |velocity-rough-anymal-c| | |velocity-rough-anymal-c-link| | Track a velocity command on rough terrain with the Anymal C robot | + | | |velocity-rough-anymal-c-direct-link| | | +------------------------------+----------------------------------------------+------------------------------------------------------------------------------+ | |velocity-flat-anymal-d| | |velocity-flat-anymal-d-link| | Track a velocity command on flat terrain with the Anymal D robot | +------------------------------+----------------------------------------------+------------------------------------------------------------------------------+ diff --git a/docs/source/features/reproducibility.rst b/docs/source/features/reproducibility.rst new file mode 100644 index 0000000000..5f77630d81 --- /dev/null +++ b/docs/source/features/reproducibility.rst @@ -0,0 +1,42 @@ +Reproducibility and Determinism +------------------------------- + +Given the same hardware and Isaac Sim (and consequently PhysX) version, the simulation produces +identical results for scenes with rigid bodies and articulations. However, the simulation results can +vary across different hardware configurations due to floating point precision and rounding errors. +At present, PhysX does not guarantee determinism for any scene with non-rigid bodies, such as cloth +or soft bodies. For more information, please refer to the `PhysX Determinism documentation`_. + +Based on above, Isaac Lab provides a deterministic simulation that ensures consistent simulation +results across different runs. This is achieved by using the same random seed for the +simulation environment and the physics engine. At construction of the environment, the random seed +is set to a fixed value using the :meth:`~omni.isaac.core.utils.torch.set_seed` method. This method sets the +random seed for both the CPU and GPU globally across different libraries, including PyTorch and +NumPy. + +In the included workflow scripts, the seed specified in the learning agent's configuration file or the +command line argument is used to set the random seed for the environment. This ensures that the +simulation results are reproducible across different runs. The seed is set into the environment +parameters :attr:`omni.isaac.lab.envs.ManagerBasedEnvCfg.seed` or :attr:`omni.isaac.lab.envs.DirectRLEnvCfg.seed` +depending on the manager-based or direct environment implementation respectively. + +For results on our determinacy testing for RL training, please check the GitHub Pull Request `#940`_. + +.. tip:: + + Due to GPU work scheduling, there's a possibility that runtime changes to simulation parameters + may alter the order in which operations take place. This occurs because environment updates can + happen while the GPU is occupied with other tasks. Due to the inherent nature of floating-point + numeric storage, any modification to the execution ordering can result in minor changes in the + least significant bits of output data. These changes may lead to divergent execution over the + course of simulating thousands of environments and simulation frames. + + An illustrative example of this issue is observed with the runtime domain randomization of object's + physics materials. This process can introduce both determinacy and simulation issues when executed + on the GPU due to the way these parameters are passed from the CPU to the GPU in the lower-level APIs. + Consequently, it is strongly advised to perform this operation only at setup time, before the + environment stepping commences. + + +.. _PhysX Determinism documentation: https://nvidia-omniverse.github.io/PhysX/physx/5.4.1/docs/API.html#determinism +.. _#940: https://github.com/isaac-sim/IsaacLab/pull/940 diff --git a/docs/source/refs/issues.rst b/docs/source/refs/issues.rst index 4a760e6d85..9d9878bee2 100644 --- a/docs/source/refs/issues.rst +++ b/docs/source/refs/issues.rst @@ -36,35 +36,6 @@ over stepping different parts of the simulation app. However, at this point, the timeline for this feature request. -Non-determinism in physics simulation -------------------------------------- - -Due to GPU work scheduling, there's a possibility that runtime changes to simulation parameters -may alter the order in which operations take place. This occurs because environment updates can -happen while the GPU is occupied with other tasks. Due to the inherent nature of floating-point -numeric storage, any modification to the execution ordering can result in minor changes in the -least significant bits of output data. These changes may lead to divergent execution over the -course of simulating thousands of environments and simulation frames. - -An illustrative example of this issue is observed with the runtime domain randomization of object's -physics materials. This process can introduce both determinancy and simulation issues when executed -on the GPU due to the way these parameters are passed from the CPU to the GPU in the lower-level APIs. -Consequently, it is strongly advised to perform this operation only at setup time, before the -environment stepping commences. - -For more information, please refer to the `PhysX Determinism documentation`_. - -In addition, due to floating point precision, states across different environments in the simulation -may be non-deterministic when the same set of actions are applied to the same initial -states. This occurs as environments are placed further apart from the world origin at (0, 0, 0). -As actors get placed at different origins in the world, floating point errors may build up -and result in slight variance in results even when starting from the same initial states. One -possible workaround for this issue is to place all actors/environments at the world origin -at (0, 0, 0) and filter out collisions between the environments. Note that this may induce -a performance degradation of around 15-50%, depending on the complexity of actors and -environment. - - Blank initial frames from the camera ------------------------------------ @@ -99,7 +70,6 @@ are stored in the instanceable asset's USD file and not in its stage reference's .. _instanceable assets: https://docs.omniverse.nvidia.com/app_isaacsim/app_isaacsim/tutorial_gym_instanceable_assets.html .. _Omniverse Isaac Sim documentation: https://docs.omniverse.nvidia.com/isaacsim/latest/known_issues.html -.. _PhysX Determinism documentation: https://nvidia-omniverse.github.io/PhysX/physx/5.3.1/docs/BestPractices.html#determinism Exiting the process diff --git a/docs/source/tutorials/01_assets/run_articulation.rst b/docs/source/tutorials/01_assets/run_articulation.rst index 061b768491..9fad5f2d90 100644 --- a/docs/source/tutorials/01_assets/run_articulation.rst +++ b/docs/source/tutorials/01_assets/run_articulation.rst @@ -49,7 +49,7 @@ an instance of the :class:`assets.Articulation` class by passing the configurati .. literalinclude:: ../../../../source/standalone/tutorials/01_assets/run_articulation.py :language: python - :start-at: # Create separate groups called "Origin1", "Origin2", "Origin3" + :start-at: # Create separate groups called "Origin1", "Origin2" :end-at: cartpole = Articulation(cfg=cartpole_cfg) diff --git a/source/extensions/omni.isaac.lab/config/extension.toml b/source/extensions/omni.isaac.lab/config/extension.toml index 5c79d89bc8..cc97090380 100644 --- a/source/extensions/omni.isaac.lab/config/extension.toml +++ b/source/extensions/omni.isaac.lab/config/extension.toml @@ -1,7 +1,7 @@ [package] # Note: Semantic Versioning is used: https://semver.org/ -version = "0.22.9" +version = "0.22.10" # Description title = "Isaac Lab framework for Robot Learning" diff --git a/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst b/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst index 3cb95d1830..1509420079 100644 --- a/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst +++ b/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst @@ -1,6 +1,18 @@ Changelog --------- +0.22.10 (2024-09-09) +~~~~~~~~~~~~~~~~~~~~ + +Added +^^^^^ + +* Added a seed parameter to the :attr:`omni.isaac.lab.envs.ManagerBasedEnvCfg` and :attr:`omni.isaac.lab.envs.DirectRLEnvCfg` + classes to set the seed for the environment. This seed is used to initialize the random number generator for the environment. +* Adapted the workflow scripts to set the seed for the environment using the seed specified in the learning agent's configuration + file or the command line argument. This ensures that the simulation results are reproducible across different runs. + + 0.22.9 (2024-09-08) ~~~~~~~~~~~~~~~~~~~ diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py index d66b5a00f7..d75d7ec781 100644 --- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py +++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env.py @@ -84,6 +84,12 @@ def __init__(self, cfg: DirectRLEnvCfg, render_mode: str | None = None, **kwargs # initialize internal variables self._is_closed = False + # set the seed for the environment + if self.cfg.seed is not None: + self.seed(self.cfg.seed) + else: + carb.log_warn("Seed not set for the environment. The environment creation may not be deterministic.") + # create a simulation context to control the simulator if SimulationContext.instance() is None: self.sim: SimulationContext = SimulationContext(self.cfg.sim) @@ -93,6 +99,7 @@ def __init__(self, cfg: DirectRLEnvCfg, render_mode: str | None = None, **kwargs # print useful information print("[INFO]: Base environment:") print(f"\tEnvironment device : {self.device}") + print(f"\tEnvironment seed : {self.cfg.seed}") print(f"\tPhysics step-size : {self.physics_dt}") print(f"\tRendering step-size : {self.physics_dt * self.cfg.sim.render_interval}") print(f"\tEnvironment step-size : {self.step_dt}") @@ -241,6 +248,10 @@ def max_episode_length(self): def reset(self, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[VecEnvObs, dict]: """Resets all the environments and returns observations. + This function calls the :meth:`_reset_idx` function to reset all the environments. + However, certain operations, such as procedural terrain generation, that happened during initialization + are not repeated. + Args: seed: The seed to use for randomization. Defaults to None, in which case the seed is not set. options: Additional information to specify how the environment is reset. Defaults to None. @@ -254,13 +265,13 @@ def reset(self, seed: int | None = None, options: dict[str, Any] | None = None) # set the seed if seed is not None: self.seed(seed) + # reset state of scene indices = torch.arange(self.num_envs, dtype=torch.int64, device=self.device) self._reset_idx(indices) - obs = self._get_observations() # return observations - return obs, self.extras + return self._get_observations(), self.extras def step(self, action: torch.Tensor) -> VecEnvStepReturn: """Execute one time-step of the environment's dynamics. diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py index 5ce1bb6108..7cafe18fe7 100644 --- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py +++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/direct_rl_env_cfg.py @@ -41,6 +41,14 @@ class DirectRLEnvCfg: """ # general settings + seed: int | None = None + """The seed for the random number generator. Defaults to None, in which case the seed is not set. + + Note: + The seed is set at the beginning of the environment initialization. This ensures that the environment + creation is deterministic and behaves similarly across different runs. + """ + decimation: int = MISSING """Number of control action updates @ sim dt per policy dt. diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/manager_based_env.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/manager_based_env.py index b02f2faa73..31823fd779 100644 --- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/manager_based_env.py +++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/manager_based_env.py @@ -74,6 +74,12 @@ def __init__(self, cfg: ManagerBasedEnvCfg): # initialize internal variables self._is_closed = False + # set the seed for the environment + if self.cfg.seed is not None: + self.seed(self.cfg.seed) + else: + carb.log_warn("Seed not set for the environment. The environment creation may not be deterministic.") + # create a simulation context to control the simulator if SimulationContext.instance() is None: # the type-annotation is required to avoid a type-checking error @@ -89,6 +95,7 @@ def __init__(self, cfg: ManagerBasedEnvCfg): # print useful information print("[INFO]: Base environment:") print(f"\tEnvironment device : {self.device}") + print(f"\tEnvironment seed : {self.cfg.seed}") print(f"\tPhysics step-size : {self.physics_dt}") print(f"\tRendering step-size : {self.physics_dt * self.cfg.sim.render_interval}") print(f"\tEnvironment step-size : {self.step_dt}") @@ -222,6 +229,10 @@ def load_managers(self): def reset(self, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[VecEnvObs, dict]: """Resets all the environments and returns observations. + This function calls the :meth:`_reset_idx` function to reset all the environments. + However, certain operations, such as procedural terrain generation, that happened during initialization + are not repeated. + Args: seed: The seed to use for randomization. Defaults to None, in which case the seed is not set. options: Additional information to specify how the environment is reset. Defaults to None. @@ -235,9 +246,11 @@ def reset(self, seed: int | None = None, options: dict[str, Any] | None = None) # set the seed if seed is not None: self.seed(seed) + # reset state of scene indices = torch.arange(self.num_envs, dtype=torch.int64, device=self.device) self._reset_idx(indices) + # return observations return self.observation_manager.compute(), self.extras diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/manager_based_env_cfg.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/manager_based_env_cfg.py index 12a69ac551..4740a27466 100644 --- a/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/manager_based_env_cfg.py +++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/envs/manager_based_env_cfg.py @@ -56,6 +56,14 @@ class ManagerBasedEnvCfg: """ # general settings + seed: int | None = None + """The seed for the random number generator. Defaults to None, in which case the seed is not set. + + Note: + The seed is set at the beginning of the environment initialization. This ensures that the environment + creation is deterministic and behaves similarly across different runs. + """ + decimation: int = MISSING """Number of control action updates @ sim dt per policy dt. diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/terrains/terrain_generator.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/terrains/terrain_generator.py index b6b3aeb226..abd411a856 100644 --- a/source/extensions/omni.isaac.lab/omni/isaac/lab/terrains/terrain_generator.py +++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/terrains/terrain_generator.py @@ -71,8 +71,8 @@ class TerrainGenerator: .. attention:: The terrain generation has its own seed parameter. This is set using the :attr:`TerrainGeneratorCfg.seed` - parameter. If the seed is not set and the caching is disabled, the terrain generation will not be - reproducible. + parameter. If the seed is not set and the caching is disabled, the terrain generation may not be + completely reproducible. """ @@ -124,10 +124,16 @@ def __init__(self, cfg: TerrainGeneratorCfg, device: str = "cpu"): " Please set the seed in the terrain generator configuration to make the generation reproducible." ) + # if the seed is not set, we assume there is a global seed set and use that. + # this ensures that the terrain is reproducible if the seed is set at the beginning of the program. + if self.cfg.seed is not None: + seed = self.cfg.seed + else: + seed = np.random.get_state()[1][0] # set the seed for reproducibility # note: we create a new random number generator to avoid affecting the global state # in the other places where random numbers are used. - self.np_rng = np.random.default_rng(self.cfg.seed) + self.np_rng = np.random.default_rng(seed) # buffer for storing valid patches self.flat_patches = {} diff --git a/source/extensions/omni.isaac.lab/omni/isaac/lab/terrains/terrain_generator_cfg.py b/source/extensions/omni.isaac.lab/omni/isaac/lab/terrains/terrain_generator_cfg.py index 0d23818fc2..5b692bfd81 100644 --- a/source/extensions/omni.isaac.lab/omni/isaac/lab/terrains/terrain_generator_cfg.py +++ b/source/extensions/omni.isaac.lab/omni/isaac/lab/terrains/terrain_generator_cfg.py @@ -105,8 +105,14 @@ class TerrainGeneratorCfg: """Configuration for the terrain generator.""" seed: int | None = None - """The seed for the random number generator. Defaults to None, - in which case the seed is not set.""" + """The seed for the random number generator. Defaults to None, in which case the seed from the + current NumPy's random state is used. + + When the seed is set, the random number generator is initialized with the given seed. This ensures + that the generated terrains are deterministic across different runs. If the seed is not set, the + seed from the current NumPy's random state is used. This assumes that the seed is set elsewhere in + the code. + """ curriculum: bool = False """Whether to use the curriculum mode. Defaults to False. diff --git a/source/extensions/omni.isaac.lab/test/terrains/test_terrain_generator.py b/source/extensions/omni.isaac.lab/test/terrains/test_terrain_generator.py index ccb6559798..c5bc2af69a 100644 --- a/source/extensions/omni.isaac.lab/test/terrains/test_terrain_generator.py +++ b/source/extensions/omni.isaac.lab/test/terrains/test_terrain_generator.py @@ -18,6 +18,8 @@ import torch import unittest +import omni.isaac.core.utils.torch as torch_utils + from omni.isaac.lab.terrains import FlatPatchSamplingCfg, TerrainGenerator, TerrainGeneratorCfg from omni.isaac.lab.terrains.config.rough import ROUGH_TERRAINS_CFG @@ -50,6 +52,44 @@ def test_generation(self): self.assertAlmostEqual(actualSize[0], expectedSizeX) self.assertAlmostEqual(actualSize[1], expectedSizeY) + def test_generation_reproducibility(self): + """Generates assorted terrains and tests that the resulting mesh is reproducible. + + We check both scenarios where the seed is set globally only and when it is set both globally and locally. + Setting only locally is not tested as it is not supported. + """ + for use_global_seed in [True, False]: + for seed in [20, 40, 80]: + with self.subTest(seed=seed): + # set initial seed + torch_utils.set_seed(seed) + + # create terrain generator + cfg = ROUGH_TERRAINS_CFG.copy() + cfg.use_cache = False + cfg.seed = seed if use_global_seed else None + terrain_generator = TerrainGenerator(cfg=cfg) + + # keep a copy of the generated terrain mesh + terrain_mesh_1 = terrain_generator.terrain_mesh.copy() + + # set seed again + torch_utils.set_seed(seed) + + # create terrain generator + terrain_generator = TerrainGenerator(cfg=cfg) + + # keep a copy of the generated terrain mesh + terrain_mesh_2 = terrain_generator.terrain_mesh.copy() + + # check if the meshes are equal + np.testing.assert_allclose( + terrain_mesh_1.vertices, terrain_mesh_2.vertices, atol=1e-5, err_msg="Vertices are not equal" + ) + np.testing.assert_allclose( + terrain_mesh_1.faces, terrain_mesh_2.faces, atol=1e-5, err_msg="Faces are not equal" + ) + def test_generation_cache(self): """Generate the terrain and check that caching works. @@ -79,9 +119,7 @@ def test_generation_cache(self): # set a random seed to disturb the process # this is to ensure that the seed inside the terrain generator makes deterministic results - np.random.seed(12456) - torch.manual_seed(12456) - torch.cuda.manual_seed_all(12456) + torch_utils.set_seed(12456) # create terrain generator with cache enabled terrain_generator = TerrainGenerator(cfg=cfg) diff --git a/source/extensions/omni.isaac.lab_tasks/test/test_environment_determinism.py b/source/extensions/omni.isaac.lab_tasks/test/test_environment_determinism.py new file mode 100644 index 0000000000..3346e8284d --- /dev/null +++ b/source/extensions/omni.isaac.lab_tasks/test/test_environment_determinism.py @@ -0,0 +1,132 @@ +# Copyright (c) 2022-2024, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Launch Isaac Sim Simulator first.""" + +from omni.isaac.lab.app import AppLauncher, run_tests + +# launch the simulator +app_launcher = AppLauncher(headless=True) +simulation_app = app_launcher.app + + +"""Rest everything follows.""" + +import gymnasium as gym +import torch +import unittest + +import carb +import omni.usd + +import omni.isaac.lab_tasks # noqa: F401 +from omni.isaac.lab_tasks.utils.parse_cfg import parse_env_cfg + + +class TestEnvironmentDeterminism(unittest.TestCase): + """Test cases for environment determinism. + + We make separate test cases for manipulation, locomotion, and dextrous manipulation environments. + This is because each of these environments has different simulation dynamics and different types of actions. + The test cases are run for different devices and seeds to ensure that the environment creation is deterministic. + """ + + @classmethod + def setUpClass(cls): + # this flag is necessary to prevent a bug where the simulation gets stuck randomly when running the + # test on many environments. + carb_settings_iface = carb.settings.get_settings() + carb_settings_iface.set_bool("/physics/cooking/ujitsoCollisionCooking", False) + + """ + Test fixtures. + """ + + def test_manipulation_env_determinism(self): + """Check deterministic environment creation for manipulation.""" + for task_name in [ + "Isaac-Open-Drawer-Franka-v0", + "Isaac-Lift-Cube-Franka-v0", + ]: + for device in ["cuda", "cpu"]: + with self.subTest(task_name=task_name, device=device): + self._test_environment_determinism(task_name, device) + + def test_locomotion_env_determinism(self): + """Check deterministic environment creation for locomotion.""" + for task_name in [ + "Isaac-Velocity-Flat-Anymal-C-v0", + "Isaac-Velocity-Rough-Anymal-C-v0", + "Isaac-Velocity-Rough-Anymal-C-Direct-v0", + ]: + for device in ["cuda", "cpu"]: + with self.subTest(task_name=task_name, device=device): + self._test_environment_determinism(task_name, device) + + def test_dextrous_env_determinism(self): + """Check deterministic environment creation for dextrous manipulation.""" + for task_name in [ + "Isaac-Repose-Cube-Allegro-v0", + # "Isaac-Repose-Cube-Allegro-Direct-v0", # FIXME: @kellyg, any idea why it is not deterministic? + ]: + for device in ["cuda", "cpu"]: + with self.subTest(task_name=task_name, device=device): + self._test_environment_determinism(task_name, device) + + """ + Helper functions. + """ + + def _test_environment_determinism(self, task_name: str, device: str): + """Check deterministic environment creation.""" + # fix number of steps + num_envs = 32 + num_steps = 100 + # call function to create and step the environment + obs_1, rew_1 = self._obtain_transition_tuples(task_name, num_envs, device, num_steps) + obs_2, rew_2 = self._obtain_transition_tuples(task_name, num_envs, device, num_steps) + + # check everything is as expected + # -- rewards should be the same + torch.testing.assert_close(rew_1, rew_2) + # -- observations should be the same + for key in obs_1.keys(): + torch.testing.assert_close(obs_1[key], obs_2[key]) + + def _obtain_transition_tuples( + self, task_name: str, num_envs: int, device: str, num_steps: int + ) -> tuple[dict, torch.Tensor]: + """Run random actions and obtain transition tuples after fixed number of steps.""" + # create a new stage + omni.usd.get_context().new_stage() + # parse configuration + env_cfg = parse_env_cfg(task_name, device=device, num_envs=num_envs) + # set seed + env_cfg.seed = 42 + + # create environment + env = gym.make(task_name, cfg=env_cfg) + + # disable control on stop + env.unwrapped.sim._app_control_on_stop_handle = None # type: ignore + + # reset environment + obs, _ = env.reset() + # simulate environment for fixed steps + with torch.inference_mode(): + for _ in range(num_steps): + # sample actions from -1 to 1 + actions = 2 * torch.rand(env.action_space.shape, device=env.unwrapped.device) - 1 + # apply actions and get initial observation + obs, rewards = env.step(actions)[:2] + + # close the environment + env.close() + + return obs, rewards + + +if __name__ == "__main__": + run_tests() diff --git a/source/extensions/omni.isaac.lab_tasks/test/test_environments.py b/source/extensions/omni.isaac.lab_tasks/test/test_environments.py index b265782a9a..1333948d57 100644 --- a/source/extensions/omni.isaac.lab_tasks/test/test_environments.py +++ b/source/extensions/omni.isaac.lab_tasks/test/test_environments.py @@ -18,6 +18,7 @@ import torch import unittest +import carb import omni.usd from omni.isaac.lab.envs import ManagerBasedRLEnv, ManagerBasedRLEnvCfg @@ -38,14 +39,17 @@ def setUpClass(cls): cls.registered_tasks.append(task_spec.id) # sort environments by name cls.registered_tasks.sort() - # print all existing task names - print(">>> All registered environments:", cls.registered_tasks) + + # this flag is necessary to prevent a bug where the simulation gets stuck randomly when running the + # test on many environments. + carb_settings_iface = carb.settings.get_settings() + carb_settings_iface.set_bool("/physics/cooking/ujitsoCollisionCooking", False) """ Test fixtures. """ - def test_multiple_instances_gpu(self): + def test_multiple_num_envs_on_gpu(self): """Run all environments with multiple instances and check environments return valid signals.""" # common parameters num_envs = 32 @@ -60,7 +64,7 @@ def test_multiple_instances_gpu(self): print(f">>> Closing environment: {task_name}") print("-" * 80) - def test_single_instance_gpu(self): + def test_single_env_on_gpu(self): """Run all environments with single instance and check environments return valid signals.""" # common parameters num_envs = 1 @@ -80,16 +84,16 @@ def test_single_instance_gpu(self): """ def _check_random_actions(self, task_name: str, device: str, num_envs: int, num_steps: int = 1000): - """Run random actions and check environments return valid signals.""" + """Run random actions and check environments returned signals are valid.""" # create a new stage omni.usd.get_context().new_stage() # parse configuration env_cfg: ManagerBasedRLEnvCfg = parse_env_cfg(task_name, device=device, num_envs=num_envs) # create environment env: ManagerBasedRLEnv = gym.make(task_name, cfg=env_cfg) - # this flag is necessary to prevent a bug where the simulation gets stuck randomly when running the - # test on many environments. - env.sim.set_setting("/physics/cooking/ujitsoCollisionCooking", False) + + # disable control on stop + env.unwrapped.sim._app_control_on_stop_handle = None # type: ignore # reset environment obs, _ = env.reset() diff --git a/source/standalone/workflows/rl_games/train.py b/source/standalone/workflows/rl_games/train.py index 672f09f9ea..9552593a85 100644 --- a/source/standalone/workflows/rl_games/train.py +++ b/source/standalone/workflows/rl_games/train.py @@ -88,6 +88,10 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: dict): # update env config device env_cfg.sim.device = f"cuda:{app_launcher.local_rank}" + # set the environment seed (after multi-gpu config for updated rank from agent seed) + # note: certain randomizations occur in the environment initialization so we set the seed here + env_cfg.seed = agent_cfg["params"]["seed"] + # specify directory for logging experiments log_root_path = os.path.join("logs", "rl_games", agent_cfg["params"]["config"]["name"]) log_root_path = os.path.abspath(log_root_path) @@ -139,8 +143,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: dict): runner = Runner(IsaacAlgoObserver()) runner.load(agent_cfg) - # set seed of the env - env.seed(agent_cfg["params"]["seed"]) # reset the agent and env runner.reset() # train the agent diff --git a/source/standalone/workflows/rsl_rl/cli_args.py b/source/standalone/workflows/rsl_rl/cli_args.py index 32827ea47c..60d7af75aa 100644 --- a/source/standalone/workflows/rsl_rl/cli_args.py +++ b/source/standalone/workflows/rsl_rl/cli_args.py @@ -67,7 +67,7 @@ def update_rsl_rl_cfg(agent_cfg: RslRlOnPolicyRunnerCfg, args_cli: argparse.Name The updated configuration for RSL-RL agent based on inputs. """ # override the default configuration with CLI arguments - if args_cli.seed is not None: + if hasattr("args_cli", "seed") and args_cli.seed is not None: agent_cfg.seed = args_cli.seed if args_cli.resume is not None: agent_cfg.resume = args_cli.resume diff --git a/source/standalone/workflows/rsl_rl/play.py b/source/standalone/workflows/rsl_rl/play.py index 4beba6f40a..3020800beb 100644 --- a/source/standalone/workflows/rsl_rl/play.py +++ b/source/standalone/workflows/rsl_rl/play.py @@ -23,7 +23,6 @@ ) parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.") parser.add_argument("--task", type=str, default=None, help="Name of the task.") -parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment") # append RSL-RL cli arguments cli_args.add_rsl_rl_args(parser) # append AppLauncher cli args diff --git a/source/standalone/workflows/rsl_rl/train.py b/source/standalone/workflows/rsl_rl/train.py index 4749cbaff5..898c097e69 100644 --- a/source/standalone/workflows/rsl_rl/train.py +++ b/source/standalone/workflows/rsl_rl/train.py @@ -76,6 +76,10 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: RslRlOnPolic args_cli.max_iterations if args_cli.max_iterations is not None else agent_cfg.max_iterations ) + # set the environment seed + # note: certain randomizations occur in the environment initialization so we set the seed here + env_cfg.seed = agent_cfg.seed + # specify directory for logging experiments log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name) log_root_path = os.path.abspath(log_root_path) @@ -114,9 +118,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: RslRlOnPolic # load previously trained model runner.load(resume_path) - # set seed of the environment - env.seed(agent_cfg.seed) - # dump the configuration into log-directory dump_yaml(os.path.join(log_dir, "params", "env.yaml"), env_cfg) dump_yaml(os.path.join(log_dir, "params", "agent.yaml"), agent_cfg) diff --git a/source/standalone/workflows/sb3/train.py b/source/standalone/workflows/sb3/train.py index f06653f7ea..47ecb2c3f7 100644 --- a/source/standalone/workflows/sb3/train.py +++ b/source/standalone/workflows/sb3/train.py @@ -72,6 +72,10 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: dict): if args_cli.max_iterations is not None: agent_cfg["n_timesteps"] = args_cli.max_iterations * agent_cfg["n_steps"] * env_cfg.scene.num_envs + # set the environment seed + # note: certain randomizations occur in the environment initialization so we set the seed here + env_cfg.seed = agent_cfg["seed"] + # directory for logging into log_dir = os.path.join("logs", "sb3", args_cli.task, datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) # dump the configuration into log-directory @@ -101,8 +105,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: dict): env = gym.wrappers.RecordVideo(env, **video_kwargs) # wrap around environment for stable baselines env = Sb3VecEnvWrapper(env) - # set the seed - env.seed(seed=agent_cfg["seed"]) if "normalize_input" in agent_cfg: env = VecNormalize( diff --git a/source/standalone/workflows/skrl/train.py b/source/standalone/workflows/skrl/train.py index 6ef94352fb..6e3fa11898 100644 --- a/source/standalone/workflows/skrl/train.py +++ b/source/standalone/workflows/skrl/train.py @@ -88,6 +88,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: dict): # override configurations with non-hydra CLI arguments env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs set_seed(args_cli.seed if args_cli.seed is not None else agent_cfg["seed"]) + # multi-gpu training config if args_cli.distributed: if args_cli.ml_framework.startswith("jax"): @@ -101,6 +102,10 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: dict): if args_cli.ml_framework.startswith("jax"): skrl.config.jax.backend = "jax" if args_cli.ml_framework == "jax" else "numpy" + # set the environment seed + # note: certain randomizations occur in the environment initialization so we set the seed here + env_cfg.seed = args_cli.seed if args_cli.seed is not None else agent_cfg["seed"] + # specify directory for logging experiments log_root_path = os.path.join("logs", "skrl", agent_cfg["agent"]["experiment"]["directory"]) log_root_path = os.path.abspath(log_root_path) diff --git a/tools/per_test_timeouts.py b/tools/per_test_timeouts.py index 2bebd99601..17e3447858 100644 --- a/tools/per_test_timeouts.py +++ b/tools/per_test_timeouts.py @@ -9,6 +9,7 @@ """ PER_TEST_TIMEOUTS = { "test_environments.py": 1200, # This test runs through all the environments for 100 steps each + "test_environment_determinism.py": 200, # This test runs through many the environments for 100 steps each "test_env_rendering_logic.py": 300, "test_rsl_rl_wrapper.py": 200, "test_sb3_wrapper.py": 200,