ALRhub · D-o-d-o-x · Oct 11, 2023 · Oct 20, 2022 · Jan 12, 2023 · Jan 12, 2023
diff --git a/README.md b/README.md
diff --git a/fancy_gym/__init__.py b/fancy_gym/__init__.py
@@ -1,13 +1,17 @@
 from fancy_gym import dmc, meta, open_ai
-from fancy_gym.utils.make_env_helpers import make, make_bb, make_rank
-from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS
-# Convenience function for all MP environments
-from .envs import ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS
-from .meta import ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS
-from .open_ai import ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS
+from fancy_gym import envs as fancy
+from fancy_gym.utils.make_env_helpers import make_bb
+from .envs.registry import register, upgrade
+from .envs.registry import ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS, MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS
 
-ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {
-    key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
-         ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
-         ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key]
-    for key, value in ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items()}
+ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['dm_control']
+ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['fancy']
+ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['metaworld']
+ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS = MOVEMENT_PRIMITIVE_ENVIRONMENTS_FOR_NS['gym']
+
+
+def make(*args, **kwargs):
+    """
+    As part of the refactor of Fancy Gym and upgrade to gymnasium the use of fancy_gym.make has been discontinued. Regular gym.make should be used instead. For more details check out the github README. If your codebase was build for older versions of Fancy Gym and relies on the old behavior and dependency versions, please check out the legacy branch.
+    """
+    raise Exception('As part of the refactor of Fancy Gym and upgrade to gymnasium the use of fancy_gym.make has been discontinued. Regular gym.make should be used instead. For more details check out the github README. If your codebase was build for older versions of Fancy Gym and relies on the old behavior and dependency versions, please check out the legacy branch.')
diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py
@@ -1,8 +1,9 @@
-from typing import Tuple, Optional, Callable
+from typing import Tuple, Optional, Callable, Dict, Any
 
-import gym
+import gymnasium as gym
 import numpy as np
-from gym import spaces
+from gymnasium import spaces
+from gymnasium.core import ObsType
 from mp_pytorch.mp.mp_interfaces import MPInterface
 
 from fancy_gym.black_box.controller.base_controller import BaseController
@@ -67,7 +68,8 @@ def __init__(self,
         self.reward_aggregation = reward_aggregation
 
         # spaces
-        self.return_context_observation = not (learn_sub_trajectories or self.do_replanning)
+        self.return_context_observation = not (
+            learn_sub_trajectories or self.do_replanning)
         self.traj_gen_action_space = self._get_traj_gen_action_space()
         self.action_space = self._get_action_space()
         self.observation_space = self._get_observation_space()
@@ -99,14 +101,17 @@ def get_trajectory(self, action: np.ndarray) -> Tuple:
             # If we do not do this, the traj_gen assumes we are continuing the trajectory.
             self.traj_gen.reset()
 
-        clipped_params = np.clip(action, self.traj_gen_action_space.low, self.traj_gen_action_space.high)
+        clipped_params = np.clip(
+            action, self.traj_gen_action_space.low, self.traj_gen_action_space.high)
         self.traj_gen.set_params(clipped_params)
-        init_time = np.array(0 if not self.do_replanning else self.current_traj_steps * self.dt)
+        init_time = np.array(
+            0 if not self.do_replanning else self.current_traj_steps * self.dt)
 
-        condition_pos = self.condition_pos if self.condition_pos is not None else self.current_pos
-        condition_vel = self.condition_vel if self.condition_vel is not None else self.current_vel
+        condition_pos = self.condition_pos if self.condition_pos is not None else self.env.get_wrapper_attr('current_pos')
+        condition_vel = self.condition_vel if self.condition_vel is not None else self.env.get_wrapper_attr('current_vel')
 
-        self.traj_gen.set_initial_conditions(init_time, condition_pos, condition_vel)
+        self.traj_gen.set_initial_conditions(
+            init_time, condition_pos, condition_vel)
         self.traj_gen.set_duration(duration, self.dt)
 
         position = get_numpy(self.traj_gen.get_traj_pos())
@@ -153,24 +158,27 @@ def step(self, action: np.ndarray):
         trajectory_length = len(position)
         rewards = np.zeros(shape=(trajectory_length,))
         if self.verbose >= 2:
-            actions = np.zeros(shape=(trajectory_length,) + self.env.action_space.shape)
+            actions = np.zeros(shape=(trajectory_length,) +
+                               self.env.action_space.shape)
             observations = np.zeros(shape=(trajectory_length,) + self.env.observation_space.shape,
                                     dtype=self.env.observation_space.dtype)
 
         infos = dict()
         done = False
 
         if not traj_is_valid:
-            obs, trajectory_return, done, infos = self.env.invalid_traj_callback(action, position, velocity,
-                                                                                 self.return_context_observation,
-                                                                                 self.tau_bound, self.delay_bound)
-            return self.observation(obs), trajectory_return, done, infos
+            obs, trajectory_return, terminated, truncated, infos = self.env.invalid_traj_callback(action, position, velocity,
+                                                                                                  self.return_context_observation, self.tau_bound, self.delay_bound)
+            return self.observation(obs), trajectory_return, terminated, truncated, infos
 
         self.plan_steps += 1
         for t, (pos, vel) in enumerate(zip(position, velocity)):
-            step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel)
-            c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high)
-            obs, c_reward, done, info = self.env.step(c_action)
+            step_action = self.tracking_controller.get_action(
+                pos, vel, self.env.get_wrapper_attr('current_pos'), self.env.get_wrapper_attr('current_vel'))
+            c_action = np.clip(
+                step_action, self.env.action_space.low, self.env.action_space.high)
+            obs, c_reward, terminated, truncated, info = self.env.step(
+                c_action)
             rewards[t] = c_reward
 
             if self.verbose >= 2:
@@ -185,9 +193,7 @@ def step(self, action: np.ndarray):
             if self.render_kwargs:
                 self.env.render(**self.render_kwargs)
 
-            if done or (self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action,
-                                                t + 1 + self.current_traj_steps)
-                    and self.plan_steps < self.max_planning_times):
+            if terminated or truncated or (self.replanning_schedule(self.env.get_wrapper_attr('current_pos'), self.env.get_wrapper_attr('current_vel'), obs, c_action, t + 1 + self.current_traj_steps) and self.plan_steps < self.max_planning_times):
 
                 if self.condition_on_desired:
                     self.condition_pos = pos
@@ -207,17 +213,18 @@ def step(self, action: np.ndarray):
 
         infos['trajectory_length'] = t + 1
         trajectory_return = self.reward_aggregation(rewards[:t + 1])
-        return self.observation(obs), trajectory_return, done, infos
+        return self.observation(obs), trajectory_return, terminated, truncated, infos
 
     def render(self, **kwargs):
         """Only set render options here, such that they can be used during the rollout.
         This only needs to be called once"""
         self.render_kwargs = kwargs
 
-    def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None):
+    def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \
+            -> Tuple[ObsType, Dict[str, Any]]:
         self.current_traj_steps = 0
         self.plan_steps = 0
         self.traj_gen.reset()
         self.condition_pos = None
         self.condition_vel = None
-        return super(BlackBoxWrapper, self).reset()
+        return super(BlackBoxWrapper, self).reset(seed=seed, options=options)
diff --git a/fancy_gym/black_box/factory/controller_factory.py b/fancy_gym/black_box/factory/controller_factory.py
@@ -11,11 +11,11 @@ def get_controller(controller_type: str, **kwargs):
     if controller_type == "motor":
         return PDController(**kwargs)
     elif controller_type == "velocity":
-        return VelController()
+        return VelController(**kwargs)
     elif controller_type == "position":
-        return PosController()
+        return PosController(**kwargs)
     elif controller_type == "metaworld":
-        return MetaWorldController()
+        return MetaWorldController(**kwargs)
     else:
         raise ValueError(f"Specified controller type {controller_type} not supported, "
                          f"please choose one of {ALL_TYPES}.")
diff --git a/fancy_gym/black_box/raw_interface_wrapper.py b/fancy_gym/black_box/raw_interface_wrapper.py
@@ -1,6 +1,6 @@
 from typing import Union, Tuple
 
-import gym
+import gymnasium as gym
 import numpy as np
 from mp_pytorch.mp.mp_interfaces import MPInterface
 
@@ -114,7 +114,8 @@ def invalid_traj_callback(self, action: np.ndarray, pos_traj: np.ndarray, vel_tr
         Returns:
             obs: artificial observation if the trajectory is invalid, by default a zero vector
             reward: artificial reward if the trajectory is invalid, by default 0
-            done: artificial done if the trajectory is invalid, by default True
+            terminated: artificial terminated if the trajectory is invalid, by default True
+            truncated: artificial truncated if the trajectory is invalid, by default False
             info: artificial info if the trajectory is invalid, by default empty dict
         """
-        return np.zeros(1), 0, True, {}
+        return np.zeros(1), 0, True, False, {}
diff --git a/fancy_gym/dmc/README.MD b/fancy_gym/dmc/README.MD
@@ -1,19 +1,19 @@
 # DeepMind Control (DMC) Wrappers
 
-These are the Environment Wrappers for selected 
-[DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) 
+These are the Environment Wrappers for selected
+[DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control)
 environments in order to use our Motion Primitive gym interface with them.
 
 ## MP Environments
 
 [//]: <> (These environments are wrapped-versions of their Deep Mind Control Suite &#40;DMC&#41; counterparts. Given most task can be)
 [//]: <> (solved in shorter horizon lengths than the original 1000 steps, we often shorten the episodes for those task.)
 
-|Name| Description|Trajectory Horizon|Action Dimension|Context Dimension
-|---|---|---|---|---|
-|`dmc_ball_in_cup-catch_promp-v0`| A ProMP wrapped version of the "catch" task for the "ball_in_cup" environment. | 1000 | 10 | 2
-|`dmc_ball_in_cup-catch_dmp-v0`| A DMP wrapped version of the "catch" task for the "ball_in_cup" environment. | 1000| 10 | 2
-|`dmc_reacher-easy_promp-v0`| A ProMP wrapped version of the "easy" task for the "reacher" environment. | 1000 | 10 | 4
-|`dmc_reacher-easy_dmp-v0`| A DMP wrapped version of the "easy" task for the "reacher" environment. | 1000| 10 | 4
-|`dmc_reacher-hard_promp-v0`| A ProMP wrapped version of the "hard" task for the "reacher" environment.| 1000 | 10 | 4
-|`dmc_reacher-hard_dmp-v0`| A DMP wrapped version of the "hard" task for the "reacher" environment. | 1000 | 10 | 4
+| Name                                     | Description                                                                    | Trajectory Horizon | Action Dimension | Context Dimension |
+| ---------------------------------------- | ------------------------------------------------------------------------------ | ------------------ | ---------------- | ----------------- |
+| `dm_control_ProDMP/ball_in_cup-catch-v0` | A ProMP wrapped version of the "catch" task for the "ball_in_cup" environment. | 1000               | 10               | 2                 |
+| `dm_control_DMP/ball_in_cup-catch-v0`    | A DMP wrapped version of the "catch" task for the "ball_in_cup" environment.   | 1000               | 10               | 2                 |
+| `dm_control_ProDMP/reacher-easy-v0`      | A ProMP wrapped version of the "easy" task for the "reacher" environment.      | 1000               | 10               | 4                 |
+| `dm_control_DMP/reacher-easy-v0`         | A DMP wrapped version of the "easy" task for the "reacher" environment.        | 1000               | 10               | 4                 |
+| `dm_control_ProDMP/reacher-hard-v0`      | A ProMP wrapped version of the "hard" task for the "reacher" environment.      | 1000               | 10               | 4                 |
+| `dm_control_DMP/reacher-hard-v0`         | A DMP wrapped version of the "hard" task for the "reacher" environment.        | 1000               | 10               | 4                 |