From 383e2714450d91bdd35160e7e4384951e3160c5b Mon Sep 17 00:00:00 2001
From: Carolin Benjamins <benjamins@tnt.uni-hannover.de>
Date: Wed, 25 May 2022 15:15:26 +0200
Subject: [PATCH 01/37] Add dm_control to requirements

---
 setup.cfg | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/setup.cfg b/setup.cfg
index 69e1b32a..f6b6ec6c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -63,6 +63,8 @@ rna =
     viennarna>=2.4.18
     dataclasses
     Distance
+dm_control =
+    dm_control>=1.0.3
 mario =
     torch>=1.9.0
     Pillow>=8.3.1

From 05f5058cbf9aa0a2ba521116bc4cef993ab73959 Mon Sep 17 00:00:00 2001
From: Carolin Benjamins <benjamins@tnt.uni-hannover.de>
Date: Wed, 25 May 2022 17:28:24 +0200
Subject: [PATCH 02/37] Add/transform dm_control/Mujoco into CARLEnv

We could also use the Mujoco envs from gym, but they don't have every env and this way we have more options to modify the envs (it is at least more obvious).
There are still some open todos.
---
 carl/envs/dmc/__init__.py       |   0
 carl/envs/dmc/carl_dmcontrol.py | 178 ++++++++++++++++++++++++++++++++
 carl/envs/dmc/tasks.py          |  47 +++++++++
 carl/envs/dmc/wrappers.py       | 110 ++++++++++++++++++++
 4 files changed, 335 insertions(+)
 create mode 100644 carl/envs/dmc/__init__.py
 create mode 100644 carl/envs/dmc/carl_dmcontrol.py
 create mode 100644 carl/envs/dmc/tasks.py
 create mode 100644 carl/envs/dmc/wrappers.py

diff --git a/carl/envs/dmc/__init__.py b/carl/envs/dmc/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/carl/envs/dmc/carl_dmcontrol.py b/carl/envs/dmc/carl_dmcontrol.py
new file mode 100644
index 00000000..cbfe9d65
--- /dev/null
+++ b/carl/envs/dmc/carl_dmcontrol.py
@@ -0,0 +1,178 @@
+import warnings
+from typing import Any, Dict, List, TypeVar, Union, Optional
+
+import gym
+from gym.envs.classic_control import CartPoleEnv
+
+from carl.envs.carl_env import CARLEnv
+from carl.envs.dmc.wrappers import MujocoToGymWrapper
+from carl.utils.trial_logger import TrialLogger
+from carl.context.selection import AbstractSelector
+
+
+ObsType = TypeVar("ObsType")
+ActType = TypeVar("ActType")
+
+
+"""
+Physics options (defaults for CartPole):
+|           apirate = 100.0                                                │
+│         collision = 0                                                    │
+│              cone = 0                                                    │
+│           density = 0.0                                                  │
+│      disableflags = 0                                                    │
+│       enableflags = 0                                                    │
+│           gravity = array([ 0.  ,  0.  , -9.81])                         │
+│          impratio = 1.0                                                  │
+│        integrator = 0                                                    │
+│        iterations = 100                                                  │
+│          jacobian = 2                                                    │
+│          magnetic = array([ 0. , -0.5,  0. ])                            │
+│    mpr_iterations = 50                                                   │
+│     mpr_tolerance = 1e-06                                                │
+│ noslip_iterations = 0                                                    │
+│  noslip_tolerance = 1e-06                                                │
+│          o_margin = 0.0                                                  │
+│          o_solimp = array([9.0e-01, 9.5e-01, 1.0e-03, 5.0e-01, 2.0e+00]) │
+│          o_solref = array([0.02, 1.  ])                                  │
+│            solver = 2                                                    │
+│          timestep = 0.0025                                               │
+│         tolerance = 1e-08                                                │
+│         viscosity = 0.0                                                  │
+│              wind = array([0., 0., 0.])                                  |
+
+
+C++ Implementation:
+https://mujoco.readthedocs.io/en/latest/APIreference.html#mjoption
+struct _mjOption                    // physics options
+{
+    // timing parameters
+    mjtNum timestep;                // timestep
+    mjtNum apirate;                 // update rate for remote API (Hz)
+
+    // solver parameters
+    mjtNum impratio;                // ratio of friction-to-normal contact impedance
+    mjtNum tolerance;               // main solver tolerance
+    mjtNum noslip_tolerance;        // noslip solver tolerance
+    mjtNum mpr_tolerance;           // MPR solver tolerance
+
+    // physical constants
+    mjtNum gravity[3];              // gravitational acceleration
+    mjtNum wind[3];                 // wind (for lift, drag and viscosity)
+    mjtNum magnetic[3];             // global magnetic flux
+    mjtNum density;                 // density of medium
+    mjtNum viscosity;               // viscosity of medium
+
+    // override contact solver parameters (if enabled)
+    mjtNum o_margin;                // margin
+    mjtNum o_solref[mjNREF];        // solref
+    mjtNum o_solimp[mjNIMP];        // solimp
+
+    // discrete settings
+    int integrator;                 // integration mode (mjtIntegrator)
+    int collision;                  // collision mode (mjtCollision)
+    int cone;                       // type of friction cone (mjtCone)
+    int jacobian;                   // type of Jacobian (mjtJacobian)
+    int solver;                     // solver algorithm (mjtSolver)
+    int iterations;                 // maximum number of main solver iterations
+    int noslip_iterations;          // maximum number of noslip solver iterations
+    int mpr_iterations;             // maximum number of MPR solver iterations
+    int disableflags;               // bit flags for disabling standard features
+    int enableflags;                // bit flags for enabling optional features
+};
+typedef struct _mjOption mjOption;
+"""
+TIMING_PARAMETERS = [
+    "timestep",  # timestep
+    "apirate",  # update rate for remote API (Hz)
+]
+SOLVER_PARAMETERS = [
+    "impratio",  # ratio of friction-to-normal contact impedance
+    "tolerance",  # main solver tolerance
+    "noslip_tolerance",  # noslip solver tolerance
+    "mpr_tolerance",  # MPR solver tolerance
+]
+PHYSICAL_CONSTANTS = [
+    "gravity",
+    "wind",
+    "magnetic",
+    "density",
+    "viscosity",
+]
+OVERRIDE_CONTACT_SOLVER_PARAMETERS = [  # (if enabled)
+    "o_margin",  # margin
+    "o_solref",  # solref
+    "o_solimp",  # solimp
+]
+DISCRETE_SETTINGS = [
+    "integrator",  # integration mode (mjtIntegrator)
+    "collision",  # collision mode (mjtCollision)
+    "cone",  # type of friction cone (mjtCone)
+    "jacobian",  # type of Jacobian (mjtJacobian)
+    "solver",  # solver algorithm (mjtSolver)
+    "iterations",  # maximum number of main solver iterations
+    "noslip_iterations",  # maximum number of noslip solver iterations
+    "mpr_iterations",  # maximum number of MPR solver iterations
+    "disableflags",  # bit flags for disabling standard features
+    "enableflags",  # bit flags for enabling optional features
+]
+
+WORLD_PARAMETERS = (
+    TIMING_PARAMETERS
+    + SOLVER_PARAMETERS
+    + PHYSICAL_CONSTANTS
+    + OVERRIDE_CONTACT_SOLVER_PARAMETERS
+    + TIMING_PARAMETERS
+)
+
+
+DEFAULT_CONTEXT = {}
+
+
+class CARLDmc(CARLEnv):
+    def __init__(
+        self,
+        env: gym.Env = CartPoleEnv(),
+        contexts: Dict[Any, Dict[Any, Any]] = {},
+        hide_context: bool = True,
+        add_gaussian_noise_to_context: bool = False,
+        gaussian_noise_std_percentage: float = 0.01,
+        logger: Optional[TrialLogger] = None,
+        scale_context_features: str = "no",
+        default_context: Optional[Dict] = DEFAULT_CONTEXT,
+        max_episode_length: int = 500,  # from https://github.com/openai/gym/blob/master/gym/envs/__init__.py
+        state_context_features: Optional[List[str]] = None,
+        dict_observation_space: bool = True,
+        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector_kwargs: Optional[Dict] = None,
+    ):
+        # TODO can we have more than 1 env?
+        env = MujocoToGymWrapper(env)
+        if not contexts:
+            contexts = {0: DEFAULT_CONTEXT}
+        super().__init__(
+            env=env,
+            contexts=contexts,
+            hide_context=hide_context,
+            add_gaussian_noise_to_context=add_gaussian_noise_to_context,
+            gaussian_noise_std_percentage=gaussian_noise_std_percentage,
+            logger=logger,
+            scale_context_features=scale_context_features,
+            default_context=default_context,
+            max_episode_length=max_episode_length,
+            state_context_features=state_context_features,
+            dict_observation_space=dict_observation_space,
+            context_selector=context_selector,
+            context_selector_kwargs=context_selector_kwargs,
+        )
+        self.whitelist_gaussian_noise = list(
+            DEFAULT_CONTEXT.keys()
+        )  # allow to augment all values
+
+    def _update_context(self) -> None:
+        # TODO change parameters of moving model (actuator force etc)
+        for context_name, context_value in self.context:
+            if context_name in WORLD_PARAMETERS:
+                setattr(self.env.physics.model.opt, context_name, context_value)
+            else:
+                warnings.warn(f"Unknown context feature {context_name}.")
diff --git a/carl/envs/dmc/tasks.py b/carl/envs/dmc/tasks.py
new file mode 100644
index 00000000..25dd9b39
--- /dev/null
+++ b/carl/envs/dmc/tasks.py
@@ -0,0 +1,47 @@
+from dm_control import suite
+
+from carl.envs.dmc.carl_dmcontrol import CARLDmc
+
+
+def load_dmc_env(domain_name, task_name, task_kwargs=None, environment_kwargs=None,
+                 visualize_reward=False):
+    return suite.load(
+        domain_name=domain_name,
+        task_name=task_name,
+        task_kwargs=task_kwargs,
+        environment_kwargs=environment_kwargs,
+        visualize_reward=visualize_reward,
+    )
+
+
+def load_dmc_cartpole():
+    return load_dmc_env(domain_name="cartpole", task_name="swingup")
+
+# TODO Find a good method how to define tasks. Define classes? Better, create an automatic class constructor
+
+
+if __name__ == "__main__":
+    # Load one task:
+    env = load_dmc_cartpole()
+    carl_env = CARLDmc(env=env)
+
+    s = carl_env.reset()
+    done = False
+    while not done:
+        action = carl_env.action_space.sample()
+        state, reward, done, info = carl_env.step(action=action)
+        print(reward, done)
+
+    # # Iterate over a task set:
+    # for domain_name, task_name in suite.BENCHMARKING:
+    #     env = suite.load(domain_name, task_name)
+    #
+    # # Step through an episode and print out reward, discount and observation.
+    # action_spec = env.action_spec()
+    # time_step = env.reset()
+    # while not time_step.last():
+    #     action = np.random.uniform(
+    #         action_spec.minimum, action_spec.maximum, size=action_spec.shape
+    #     )
+    #     time_step = env.step(action)
+    #     print(time_step.reward, time_step.discount, time_step.observation)
diff --git a/carl/envs/dmc/wrappers.py b/carl/envs/dmc/wrappers.py
new file mode 100644
index 00000000..52e6c7e5
--- /dev/null
+++ b/carl/envs/dmc/wrappers.py
@@ -0,0 +1,110 @@
+from typing import Tuple, Optional, Union
+
+import dm_control.rl
+import gym
+import numpy as np
+from dm_env import StepType
+from gym import spaces
+
+from carl.envs.dmc.carl_dmcontrol import ActType, ObsType
+
+
+class MujocoToGymWrapper(gym.Env):
+    def __init__(self, env: dm_control.rl.control.Environment):
+        # TODO set seeds
+        self.env = env
+
+        action_spec = self.env.action_spec()
+        self.action_space = spaces.Box(action_spec.minimum, action_spec.maximum, dtype=action_spec.dtype)
+
+        obs_spec = self.env.observation_spec()
+        # obs_spaces = {
+        #     k: spaces.Box(low=-np.inf, high=np.inf, shape=v.shape, dtype=v.dtype)
+        #     for k, v in obs_spec.items()
+        # }
+        # self.observation_space = spaces.Dict(spaces=obs_spaces)
+        # TODO add support for Dict Spaces in CARLEnv (later)
+        shapes = [int(np.sum([v.shape for v in obs_spec.values()]))]
+        lows = np.array([-np.inf] * shapes[0])
+        highs = np.array([np.inf] * shapes[0])
+        dtype = np.unique([[v.dtype for v in obs_spec.values()]])[0]
+        self.observation_space = spaces.Box(low=lows, high=highs, shape=shapes, dtype=dtype)
+
+    def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]:
+        """Run one timestep of the environment's dynamics. When end of
+        episode is reached, you are responsible for calling `reset()`
+        to reset this environment's state.
+
+        Accepts an action and returns a tuple (observation, reward, done, info).
+
+        Args:
+            action (object): an action provided by the agent
+
+        Returns:
+            observation (object): agent's observation of the current environment
+            reward (float) : amount of reward returned after previous action
+            done (bool): whether the episode has ended, in which case further step() calls will return undefined results
+            info (dict): contains auxiliary diagnostic information (helpful for debugging, logging, and sometimes learning)
+        """
+        timestep = self.env.step(action=action)
+        step_type: StepType = timestep.step_type
+        reward = timestep.reward
+        discount = timestep.discount
+        observation = timestep.observation
+        info = {
+            "step_type": step_type,
+            "discount": discount
+        }
+        done = step_type == StepType.LAST
+        return observation, reward, done, info
+
+    def reset(
+        self,
+        *,
+        seed: Optional[int] = None,
+        return_info: bool = False,
+        options: Optional[dict] = None,
+    ) -> Union[ObsType, tuple[ObsType, dict]]:
+        super(MujocoToGymWrapper, self).reset(seed=seed, return_info=return_info, options=options)
+        timestep = self.env.reset()
+        return timestep.observation
+
+    def render(self, mode="human"):
+        """Renders the environment.
+
+        The set of supported modes varies per environment. (And some
+        third-party environments may not support rendering at all.)
+        By convention, if mode is:
+
+        - human: render to the current display or terminal and
+          return nothing. Usually for human consumption.
+        - rgb_array: Return an numpy.ndarray with shape (x, y, 3),
+          representing RGB values for an x-by-y pixel image, suitable
+          for turning into a video.
+        - ansi: Return a string (str) or StringIO.StringIO containing a
+          terminal-style text representation. The text can include newlines
+          and ANSI escape sequences (e.g. for colors).
+
+        Note:
+            Make sure that your class's metadata 'render_modes' key includes
+              the list of supported modes. It's recommended to call super()
+              in implementations to use the functionality of this method.
+
+        Args:
+            mode (str): the mode to render with
+
+        Example:
+
+        class MyEnv(Env):
+            metadata = {'render_modes': ['human', 'rgb_array']}
+
+            def render(self, mode='human'):
+                if mode == 'rgb_array':
+                    return np.array(...) # return RGB frame suitable for video
+                elif mode == 'human':
+                    ... # pop up a window and render
+                else:
+                    super(MyEnv, self).render(mode=mode) # just raise an exception
+        """
+        # TODO render mujoco
+        pass
\ No newline at end of file

From 594eff636496590a6b5bbba76fb8469dedee6b5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Tue, 7 Jun 2022 11:47:52 +0200
Subject: [PATCH 03/37] adapt observations and render rgb

---
 carl/envs/dmc/wrappers.py | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/carl/envs/dmc/wrappers.py b/carl/envs/dmc/wrappers.py
index 52e6c7e5..2f15f90e 100644
--- a/carl/envs/dmc/wrappers.py
+++ b/carl/envs/dmc/wrappers.py
@@ -1,4 +1,4 @@
-from typing import Tuple, Optional, Union
+from typing import Tuple, Optional, Union, TypeVar
 
 import dm_control.rl
 import gym
@@ -6,8 +6,8 @@
 from dm_env import StepType
 from gym import spaces
 
-from carl.envs.dmc.carl_dmcontrol import ActType, ObsType
-
+ObsType = TypeVar("ObsType")
+ActType = TypeVar("ActType")
 
 class MujocoToGymWrapper(gym.Env):
     def __init__(self, env: dm_control.rl.control.Environment):
@@ -50,7 +50,10 @@ def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]:
         step_type: StepType = timestep.step_type
         reward = timestep.reward
         discount = timestep.discount
-        observation = timestep.observation
+        if isinstance(self.observation_space, spaces.Box):
+            observation = self.observation_to_box(timestep.observation)
+        else:
+            raise NotImplementedError
         info = {
             "step_type": step_type,
             "discount": discount
@@ -67,7 +70,20 @@ def reset(
     ) -> Union[ObsType, tuple[ObsType, dict]]:
         super(MujocoToGymWrapper, self).reset(seed=seed, return_info=return_info, options=options)
         timestep = self.env.reset()
-        return timestep.observation
+        if isinstance(self.observation_space, spaces.Box):
+            observation = self.observation_to_box(timestep.observation)
+        else:
+            raise NotImplementedError
+        return observation
+
+    def observation_to_box(self, observation):
+        observations = []
+        for v in observation.values():
+            observations.extend(v)
+        observation_array = np.array(observations, dtype=self.observation_space.dtype)
+        # TODO make sure observations are within bounds
+        # observation_array = np.clip(observation_array, self.observation_space.low, self.observation_space.high)
+        return observation_array
 
     def render(self, mode="human"):
         """Renders the environment.
@@ -106,5 +122,11 @@ def render(self, mode='human'):
                 else:
                     super(MyEnv, self).render(mode=mode) # just raise an exception
         """
-        # TODO render mujoco
-        pass
\ No newline at end of file
+        # TODO render mujoco human version
+
+        if mode == "human":
+            raise NotImplementedError
+        elif mode == "rgb_array":
+            return self.env._physics.render(camera_id=1)
+        else:
+            raise NotImplementedError

From 1efa11e1f05706aa9e9f69e648dc9283d5d3b642 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Tue, 7 Jun 2022 15:46:06 +0200
Subject: [PATCH 04/37] change dmc load, simplify wrapper

---
 carl/envs/dmc/carl_dm_cartpole.py | 73 +++++++++++++++++++++++++++++++
 carl/envs/dmc/carl_dmcontrol.py   | 38 ++++++++++------
 carl/envs/dmc/wrappers.py         | 15 ++-----
 3 files changed, 101 insertions(+), 25 deletions(-)
 create mode 100644 carl/envs/dmc/carl_dm_cartpole.py

diff --git a/carl/envs/dmc/carl_dm_cartpole.py b/carl/envs/dmc/carl_dm_cartpole.py
new file mode 100644
index 00000000..48e44ec3
--- /dev/null
+++ b/carl/envs/dmc/carl_dm_cartpole.py
@@ -0,0 +1,73 @@
+from typing import Any, Dict, List, Optional, Union
+
+import gym
+
+from carl.utils.trial_logger import TrialLogger
+from carl.context.selection import AbstractSelector
+from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv, DEFAULT_CONTEXT, load_dmc_env
+
+
+class CARLDmcCartpoleEnv(CARLDmcEnv):
+    def __init__(
+        self,
+        domain: str = "cartpole",
+        task: str = "swingup",
+        contexts: Dict[Any, Dict[Any, Any]] = {},
+        hide_context: bool = False,
+        add_gaussian_noise_to_context: bool = False,
+        gaussian_noise_std_percentage: float = 0.01,
+        logger: Optional[TrialLogger] = None,
+        scale_context_features: str = "no",
+        default_context: Optional[Dict] = DEFAULT_CONTEXT,
+        max_episode_length: int = 500,  # from https://github.com/openai/gym/blob/master/gym/envs/__init__.py
+        state_context_features: Optional[List[str]] = None,
+        dict_observation_space: bool = False,
+        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector_kwargs: Optional[Dict] = None,
+    ):
+        if not contexts:
+            contexts = {0: DEFAULT_CONTEXT}
+        if dict_observation_space:
+            raise NotImplementedError
+        else:
+            env = load_dmc_env(domain_name=domain, task_name=task, environment_kwargs={"flat_observation": True})
+        super().__init__(
+            env=env,
+            contexts=contexts,
+            hide_context=hide_context,
+            add_gaussian_noise_to_context=add_gaussian_noise_to_context,
+            gaussian_noise_std_percentage=gaussian_noise_std_percentage,
+            logger=logger,
+            scale_context_features=scale_context_features,
+            default_context=default_context,
+            max_episode_length=max_episode_length,
+            state_context_features=state_context_features,
+            dict_observation_space=dict_observation_space,
+            context_selector=context_selector,
+            context_selector_kwargs=context_selector_kwargs,
+        )
+    
+    def _update_context(self) -> None:
+        pass
+        # self.env.gravity = self.context["gravity"]
+        # print(self.env.env._physics)
+        # print(self.env.env.__dict__)
+
+        # "gravity",
+        # "wind",
+        # "magnetic",
+        # "density",
+        # "viscosity",
+        # high = np.array(
+        #     [
+        #         self.env.x_threshold * 2,
+        #         np.finfo(np.float32).max,
+        #         self.env.theta_threshold_radians * 2,
+        #         np.finfo(np.float32).max,
+        #     ],
+        #     dtype=np.float32,
+        # )
+        # low = -high
+        # print(low)
+        # print(high)
+        # self.build_observation_space(low, high, CONTEXT_BOUNDS)
diff --git a/carl/envs/dmc/carl_dmcontrol.py b/carl/envs/dmc/carl_dmcontrol.py
index cbfe9d65..d3354884 100644
--- a/carl/envs/dmc/carl_dmcontrol.py
+++ b/carl/envs/dmc/carl_dmcontrol.py
@@ -1,6 +1,9 @@
 import warnings
 from typing import Any, Dict, List, TypeVar, Union, Optional
 
+import numpy as np
+from dm_control import suite
+
 import gym
 from gym.envs.classic_control import CartPoleEnv
 
@@ -9,9 +12,10 @@
 from carl.utils.trial_logger import TrialLogger
 from carl.context.selection import AbstractSelector
 
+from carl.envs.dmc.wrappers import ActType, ObsType
 
-ObsType = TypeVar("ObsType")
-ActType = TypeVar("ActType")
+# ObsType = TypeVar("ObsType")
+# ActType = TypeVar("ActType")
 
 
 """
@@ -126,13 +130,19 @@
 )
 
 
-DEFAULT_CONTEXT = {}
+DEFAULT_CONTEXT = {
+    "gravity": -9.81
+}
+
+CONTEXT_BOUNDS = {
+    "gravity": (0.1, np.inf, float)
+}
 
 
-class CARLDmc(CARLEnv):
+class CARLDmcEnv(CARLEnv):
     def __init__(
         self,
-        env: gym.Env = CartPoleEnv(),
+        env: gym.Env,
         contexts: Dict[Any, Dict[Any, Any]] = {},
         hide_context: bool = True,
         add_gaussian_noise_to_context: bool = False,
@@ -142,7 +152,7 @@ def __init__(
         default_context: Optional[Dict] = DEFAULT_CONTEXT,
         max_episode_length: int = 500,  # from https://github.com/openai/gym/blob/master/gym/envs/__init__.py
         state_context_features: Optional[List[str]] = None,
-        dict_observation_space: bool = True,
+        dict_observation_space: bool = False,
         context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
@@ -169,10 +179,12 @@ def __init__(
             DEFAULT_CONTEXT.keys()
         )  # allow to augment all values
 
-    def _update_context(self) -> None:
-        # TODO change parameters of moving model (actuator force etc)
-        for context_name, context_value in self.context:
-            if context_name in WORLD_PARAMETERS:
-                setattr(self.env.physics.model.opt, context_name, context_value)
-            else:
-                warnings.warn(f"Unknown context feature {context_name}.")
+def load_dmc_env(domain_name, task_name, task_kwargs=None, environment_kwargs=None,
+                 visualize_reward=False):
+    return suite.load(
+        domain_name=domain_name,
+        task_name=task_name,
+        task_kwargs=task_kwargs,
+        environment_kwargs=environment_kwargs,
+        visualize_reward=visualize_reward,
+    )
diff --git a/carl/envs/dmc/wrappers.py b/carl/envs/dmc/wrappers.py
index 2f15f90e..b3ec2c0e 100644
--- a/carl/envs/dmc/wrappers.py
+++ b/carl/envs/dmc/wrappers.py
@@ -51,7 +51,7 @@ def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]:
         reward = timestep.reward
         discount = timestep.discount
         if isinstance(self.observation_space, spaces.Box):
-            observation = self.observation_to_box(timestep.observation)
+            observation = timestep.observation["observations"]
         else:
             raise NotImplementedError
         info = {
@@ -71,20 +71,11 @@ def reset(
         super(MujocoToGymWrapper, self).reset(seed=seed, return_info=return_info, options=options)
         timestep = self.env.reset()
         if isinstance(self.observation_space, spaces.Box):
-            observation = self.observation_to_box(timestep.observation)
+            observation = timestep.observation["observations"]
         else:
             raise NotImplementedError
         return observation
 
-    def observation_to_box(self, observation):
-        observations = []
-        for v in observation.values():
-            observations.extend(v)
-        observation_array = np.array(observations, dtype=self.observation_space.dtype)
-        # TODO make sure observations are within bounds
-        # observation_array = np.clip(observation_array, self.observation_space.low, self.observation_space.high)
-        return observation_array
-
     def render(self, mode="human"):
         """Renders the environment.
 
@@ -127,6 +118,6 @@ def render(self, mode='human'):
         if mode == "human":
             raise NotImplementedError
         elif mode == "rgb_array":
-            return self.env._physics.render(camera_id=1)
+            return self.env.physics.render(camera_id=1)
         else:
             raise NotImplementedError

From dcbdeccb62727b2034cd9bfabd5bbd1028debb72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Wed, 8 Jun 2022 15:49:09 +0200
Subject: [PATCH 05/37] dmc initializations

---
 carl/envs/__init__.py     | 9 +++++++++
 carl/envs/dmc/__init__.py | 5 +++++
 2 files changed, 14 insertions(+)

diff --git a/carl/envs/__init__.py b/carl/envs/__init__.py
index 8cc25f02..6131d0a0 100644
--- a/carl/envs/__init__.py
+++ b/carl/envs/__init__.py
@@ -41,3 +41,12 @@
     warnings.warn(
         "Module 'RNA' not found. Please follow installation guide for RNA environment."
     )
+
+dm_control_spec = importlib.util.find_spec("dm_control")
+found = dm_control_spec is not None
+if found:
+    from carl.envs.dmc import *
+else:
+    warnings.warn(
+        "Module 'dm_control' not found. If you want to use these environments, please follow the installation guide."
+    )
diff --git a/carl/envs/dmc/__init__.py b/carl/envs/dmc/__init__.py
index e69de29b..59b761f5 100644
--- a/carl/envs/dmc/__init__.py
+++ b/carl/envs/dmc/__init__.py
@@ -0,0 +1,5 @@
+# Contexts and bounds by name
+from carl.envs.dmc.carl_dm_cartpole import (
+    DEFAULT_CONTEXT as CARLDmcCartPoleEnv_defaults,
+)
+from carl.envs.dmc.carl_dm_cartpole import CARLDmcCartPoleEnv

From 4c486dc740c526c4f8788df3b074cb78ebf3ca4a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Wed, 8 Jun 2022 15:51:11 +0200
Subject: [PATCH 06/37] refactor, load context into dmc cartpole

---
 carl/envs/dmc/carl_dm_cartpole.py   | 102 ++++++++++----
 carl/envs/dmc/carl_dmcontrol.py     | 104 ++++++--------
 carl/envs/dmc/dmc_tasks/cartpole.py | 206 ++++++++++++++++++++++++++++
 carl/envs/dmc/utils.py              |  35 +++++
 carl/envs/dmc/wrappers.py           |   4 +-
 5 files changed, 362 insertions(+), 89 deletions(-)
 create mode 100644 carl/envs/dmc/dmc_tasks/cartpole.py
 create mode 100644 carl/envs/dmc/utils.py

diff --git a/carl/envs/dmc/carl_dm_cartpole.py b/carl/envs/dmc/carl_dm_cartpole.py
index 48e44ec3..e930f309 100644
--- a/carl/envs/dmc/carl_dm_cartpole.py
+++ b/carl/envs/dmc/carl_dm_cartpole.py
@@ -1,17 +1,74 @@
 from typing import Any, Dict, List, Optional, Union
 
-import gym
+import numpy as np
 
 from carl.utils.trial_logger import TrialLogger
 from carl.context.selection import AbstractSelector
-from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv, DEFAULT_CONTEXT, load_dmc_env
+from carl.envs.dmc.wrappers import MujocoToGymWrapper
+from carl.envs.dmc.utils import load_dmc_env
+from carl.envs.carl_env import CARLEnv
 
 
-class CARLDmcCartpoleEnv(CARLDmcEnv):
+"""
+Physics options (defaults for CartPole):
+|           apirate = 100.0                                                │
+│         collision = 0                                                    │
+│              cone = 0                                                    │
+│           density = 0.0                                                  │
+│      disableflags = 0                                                    │
+│       enableflags = 0                                                    │
+│           gravity = array([ 0.  ,  0.  , -9.81])                         │
+│          impratio = 1.0                                                  │
+│        integrator = 0                                                    │
+│        iterations = 100                                                  │
+│          jacobian = 2                                                    │
+│          magnetic = array([ 0. , -0.5,  0. ])                            │
+│    mpr_iterations = 50                                                   │
+│     mpr_tolerance = 1e-06                                                │
+│ noslip_iterations = 0                                                    │
+│  noslip_tolerance = 1e-06                                                │
+│          o_margin = 0.0                                                  │
+│          o_solimp = array([9.0e-01, 9.5e-01, 1.0e-03, 5.0e-01, 2.0e+00]) │
+│          o_solref = array([0.02, 1.  ])                                  │
+│            solver = 2                                                    │
+│          timestep = 0.0025                                               │
+│         tolerance = 1e-08                                                │
+│         viscosity = 0.0                                                  │
+│              wind = array([0., 0., 0.])                                  |
+"""
+
+DEFAULT_CONTEXT = {
+    "gravity": [0., 0., -9.81],
+    "masscart": 1.0,  # Should be seen as 100% and scaled accordingly
+    "masspole": 0.1,  # Should be seen as 100% and scaled accordingly
+    "pole_length": 1.0,  # Should be seen as 100% and scaled accordingly
+    "force_magnifier": 10.0,
+    "timestep": 0.01,  # Seconds between updates
+    "magnetic": [0., -0.5, 0.],
+    "wind": [0., 0., 0.],
+}
+
+CONTEXT_BOUNDS = {
+    "gravity": [(0.1, -np.inf, float), (0.1, -np.inf, float), (0.1, -np.inf, float)],  # Negative gravity
+    "masscart": (0.1, 10, float),  # Cart mass can be varied by a factor of 10
+    "masspole": (0.01, 1, float),  # Pole mass can be varied by a factor of 10
+    "pole_length": (0.05, 5, float),  # Pole length can be varied by a factor of 10
+    "force_magnifier": (1, 100, int),  # Force magnifier can be varied by a factor of 10
+    "timestep": (
+        0.001,
+        0.1,
+        float,
+    ),  # TODO not sure how much it can be varied ...Update interval can be varied by a factor of 10
+    "magnetic": [(-np.inf, np.inf, float), (-np.inf, np.inf, float), (-np.inf, np.inf, float)],
+    "wind": [(-np.inf, np.inf, float), (-np.inf, np.inf, float), (-np.inf, np.inf, float)],
+}
+
+
+class CARLDmcCartPoleEnv(CARLEnv):
     def __init__(
         self,
         domain: str = "cartpole",
-        task: str = "swingup",
+        task: str = "swingup_context",
         contexts: Dict[Any, Dict[Any, Any]] = {},
         hide_context: bool = False,
         add_gaussian_noise_to_context: bool = False,
@@ -27,10 +84,13 @@ def __init__(
     ):
         if not contexts:
             contexts = {0: DEFAULT_CONTEXT}
+        self.domain = domain
+        self.task = task
         if dict_observation_space:
             raise NotImplementedError
         else:
-            env = load_dmc_env(domain_name=domain, task_name=task, environment_kwargs={"flat_observation": True})
+            env = load_dmc_env(domain_name=domain, task_name=task, context={}, environment_kwargs={"flat_observation": True})
+            env = MujocoToGymWrapper(env)
         super().__init__(
             env=env,
             contexts=contexts,
@@ -46,28 +106,14 @@ def __init__(
             context_selector=context_selector,
             context_selector_kwargs=context_selector_kwargs,
         )
+        # TODO check gaussian noise on context features
+        self.whitelist_gaussian_noise = list(
+            DEFAULT_CONTEXT.keys()
+        )  # allow to augment all values
     
     def _update_context(self) -> None:
-        pass
-        # self.env.gravity = self.context["gravity"]
-        # print(self.env.env._physics)
-        # print(self.env.env.__dict__)
-
-        # "gravity",
-        # "wind",
-        # "magnetic",
-        # "density",
-        # "viscosity",
-        # high = np.array(
-        #     [
-        #         self.env.x_threshold * 2,
-        #         np.finfo(np.float32).max,
-        #         self.env.theta_threshold_radians * 2,
-        #         np.finfo(np.float32).max,
-        #     ],
-        #     dtype=np.float32,
-        # )
-        # low = -high
-        # print(low)
-        # print(high)
-        # self.build_observation_space(low, high, CONTEXT_BOUNDS)
+        if self.dict_observation_space:
+            raise NotImplementedError
+        else:
+            env = load_dmc_env(domain_name=self.domain, task_name=self.task, context=self.context, environment_kwargs={"flat_observation": True})
+            self.env = MujocoToGymWrapper(env)
diff --git a/carl/envs/dmc/carl_dmcontrol.py b/carl/envs/dmc/carl_dmcontrol.py
index d3354884..10cbbcb2 100644
--- a/carl/envs/dmc/carl_dmcontrol.py
+++ b/carl/envs/dmc/carl_dmcontrol.py
@@ -1,8 +1,10 @@
 import warnings
 from typing import Any, Dict, List, TypeVar, Union, Optional
 
+import inspect
 import numpy as np
 from dm_control import suite
+from dm_control.utils import containers
 
 import gym
 from gym.envs.classic_control import CartPoleEnv
@@ -129,62 +131,46 @@
     + TIMING_PARAMETERS
 )
 
-
-DEFAULT_CONTEXT = {
-    "gravity": -9.81
-}
-
-CONTEXT_BOUNDS = {
-    "gravity": (0.1, np.inf, float)
-}
-
-
-class CARLDmcEnv(CARLEnv):
-    def __init__(
-        self,
-        env: gym.Env,
-        contexts: Dict[Any, Dict[Any, Any]] = {},
-        hide_context: bool = True,
-        add_gaussian_noise_to_context: bool = False,
-        gaussian_noise_std_percentage: float = 0.01,
-        logger: Optional[TrialLogger] = None,
-        scale_context_features: str = "no",
-        default_context: Optional[Dict] = DEFAULT_CONTEXT,
-        max_episode_length: int = 500,  # from https://github.com/openai/gym/blob/master/gym/envs/__init__.py
-        state_context_features: Optional[List[str]] = None,
-        dict_observation_space: bool = False,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
-        context_selector_kwargs: Optional[Dict] = None,
-    ):
-        # TODO can we have more than 1 env?
-        env = MujocoToGymWrapper(env)
-        if not contexts:
-            contexts = {0: DEFAULT_CONTEXT}
-        super().__init__(
-            env=env,
-            contexts=contexts,
-            hide_context=hide_context,
-            add_gaussian_noise_to_context=add_gaussian_noise_to_context,
-            gaussian_noise_std_percentage=gaussian_noise_std_percentage,
-            logger=logger,
-            scale_context_features=scale_context_features,
-            default_context=default_context,
-            max_episode_length=max_episode_length,
-            state_context_features=state_context_features,
-            dict_observation_space=dict_observation_space,
-            context_selector=context_selector,
-            context_selector_kwargs=context_selector_kwargs,
-        )
-        self.whitelist_gaussian_noise = list(
-            DEFAULT_CONTEXT.keys()
-        )  # allow to augment all values
-
-def load_dmc_env(domain_name, task_name, task_kwargs=None, environment_kwargs=None,
-                 visualize_reward=False):
-    return suite.load(
-        domain_name=domain_name,
-        task_name=task_name,
-        task_kwargs=task_kwargs,
-        environment_kwargs=environment_kwargs,
-        visualize_reward=visualize_reward,
-    )
+# class CARLDmcEnv(CARLEnv):
+#     def __init__(
+#         self,
+#         env: gym.Env,
+#         contexts: Dict[Any, Dict[Any, Any]] = {},
+#         hide_context: bool = True,
+#         add_gaussian_noise_to_context: bool = False,
+#         gaussian_noise_std_percentage: float = 0.01,
+#         logger: Optional[TrialLogger] = None,
+#         scale_context_features: str = "no",
+#         default_context: Optional[Dict] = DEFAULT_CONTEXT,
+#         max_episode_length: int = 500,  # from https://github.com/openai/gym/blob/master/gym/envs/__init__.py
+#         state_context_features: Optional[List[str]] = None,
+#         dict_observation_space: bool = False,
+#         context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+#         context_selector_kwargs: Optional[Dict] = None,
+#     ):
+#         # TODO can we have more than 1 env?
+#         env = MujocoToGymWrapper(env)
+#         if not contexts:
+#             contexts = {0: DEFAULT_CONTEXT}
+#         super().__init__(
+#             env=env,
+#             contexts=contexts,
+#             hide_context=hide_context,
+#             add_gaussian_noise_to_context=add_gaussian_noise_to_context,
+#             gaussian_noise_std_percentage=gaussian_noise_std_percentage,
+#             logger=logger,
+#             scale_context_features=scale_context_features,
+#             default_context=default_context,
+#             max_episode_length=max_episode_length,
+#             state_context_features=state_context_features,
+#             dict_observation_space=dict_observation_space,
+#             context_selector=context_selector,
+#             context_selector_kwargs=context_selector_kwargs,
+#         )
+#         self.whitelist_gaussian_noise = list(
+#             DEFAULT_CONTEXT.keys()
+#         )  # allow to augment all values
+#         # print(self.env.env.__dict__)
+#         # print(self.env.env.task.__dict__)
+#         # print(self.env.env.physics.__dict__)
+#         #print(SUITE.Tagged)
diff --git a/carl/envs/dmc/dmc_tasks/cartpole.py b/carl/envs/dmc/dmc_tasks/cartpole.py
new file mode 100644
index 00000000..c771b45d
--- /dev/null
+++ b/carl/envs/dmc/dmc_tasks/cartpole.py
@@ -0,0 +1,206 @@
+
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Cartpole domain."""
+
+import collections
+from unittest.mock import DEFAULT
+from urllib.error import ContentTooShortError
+
+from dm_control import mujoco
+from dm_control.rl import control
+from dm_control.suite import base
+from dm_control.suite import common
+from dm_control.utils import containers
+from dm_control.utils import rewards
+from lxml import etree
+import numpy as np
+from dm_control import suite
+
+_DEFAULT_TIME_LIMIT = 10
+SUITE = containers.TaggedTasks()
+
+DEFAULT_CONTEXT = {
+  "gravity": -9.81,
+}
+
+def get_model_and_assets(num_poles=1):
+  """Returns a tuple containing the model XML string and a dict of assets."""
+  return _make_model(num_poles), common.ASSETS
+
+@SUITE.add('benchmarking')
+def swingup_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None,
+            environment_kwargs=None):
+  """Returns the Cartpole Swing-Up task."""
+  xml_string, assets = get_model_and_assets()
+  if context != {}:
+    mjcf = etree.fromstring(xml_string)
+    pole = mjcf.find("./default/default/geom")
+    pole.set("mass", str(context["masspole"]))
+    pole.set("fromto", "0 0 0 0 0 " + str(context["pole_length"]))
+    cart = mjcf.find("./worldbody/body/geom")
+    cart.set("mass", str(context["masscart"]))
+    force = mjcf.find("./actuator/motor")
+    force.set("gear", str(context["force_magnifier"]))
+    keys = []
+    options = mjcf.findall("./option")
+    for option in options:
+      for k, v in option.items():
+        keys.append(k)
+        if k == "gravity":
+          option.set("gravity", " ".join([str(i) for i in context["gravity"]]))
+        elif k == "timestep":
+          option.set("timestep", str(context["timestep"]))
+        elif k == "magnetic":
+          option.set("magnetic", " ".join([str(i) for i in context["magnetic"]]))
+        elif k == "wind":
+          option.set("wind", " ".join([str(i) for i in context["wind"]]))
+    if "gravity" not in keys:
+      mjcf.append(etree.Element("option", gravity=" ".join([str(i) for i in context["gravity"]])))
+    if "timestep" not in keys:
+      mjcf.append(etree.Element("option", timestep=str(context["timestep"])))
+    if "magnetic" not in keys:
+      mjcf.append(etree.Element("option", magnetic=" ".join([str(i) for i in context["magnetic"]])))
+    if "wind" not in keys:
+      mjcf.append(etree.Element("option", wind=" ".join([str(i) for i in context["wind"]])))
+
+    xml_string = etree.tostring(mjcf, pretty_print=True)
+    print(xml_string)
+    
+  physics = Physics.from_xml_string(xml_string, assets)
+  task = Balance(swing_up=True, sparse=False, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, **environment_kwargs)
+
+
+def _make_model(n_poles):
+  """Generates an xml string defining a cart with `n_poles` bodies."""
+  xml_string = common.read_model('cartpole.xml')
+  if n_poles == 1:
+    return xml_string
+  mjcf = etree.fromstring(xml_string)
+  parent = mjcf.find('./worldbody/body/body')  # Find first pole.
+  # Make chain of poles.
+  for pole_index in range(2, n_poles+1):
+    child = etree.Element('body', name='pole_{}'.format(pole_index),
+                          pos='0 0 1', childclass='pole')
+    etree.SubElement(child, 'joint', name='hinge_{}'.format(pole_index))
+    etree.SubElement(child, 'geom', name='pole_{}'.format(pole_index))
+    parent.append(child)
+    parent = child
+  # Move plane down.
+  floor = mjcf.find('./worldbody/geom')
+  floor.set('pos', '0 0 {}'.format(1 - n_poles - .05))
+  # Move cameras back.
+  cameras = mjcf.findall('./worldbody/camera')
+  cameras[0].set('pos', '0 {} 1'.format(-1 - 2*n_poles))
+  cameras[1].set('pos', '0 {} 2'.format(-2*n_poles))
+  return etree.tostring(mjcf, pretty_print=True)
+
+
+class Physics(mujoco.Physics):
+  """Physics simulation with additional features for the Cartpole domain."""
+
+  def cart_position(self):
+    """Returns the position of the cart."""
+    return self.named.data.qpos['slider'][0]
+
+  def angular_vel(self):
+    """Returns the angular velocity of the pole."""
+    return self.data.qvel[1:]
+
+  def pole_angle_cosine(self):
+    """Returns the cosine of the pole angle."""
+    return self.named.data.xmat[2:, 'zz']
+
+  def bounded_position(self):
+    """Returns the state, with pole angle split into sin/cos."""
+    return np.hstack((self.cart_position(),
+                      self.named.data.xmat[2:, ['zz', 'xz']].ravel()))
+
+
+class Balance(base.Task):
+  """A Cartpole `Task` to balance the pole.
+  State is initialized either close to the target configuration or at a random
+  configuration.
+  """
+  _CART_RANGE = (-.25, .25)
+  _ANGLE_COSINE_RANGE = (.995, 1)
+
+  def __init__(self, swing_up, sparse, random=None):
+    """Initializes an instance of `Balance`.
+    Args:
+      swing_up: A `bool`, which if `True` sets the cart to the middle of the
+        slider and the pole pointing towards the ground. Otherwise, sets the
+        cart to a random position on the slider and the pole to a random
+        near-vertical position.
+      sparse: A `bool`, whether to return a sparse or a smooth reward.
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    self._sparse = sparse
+    self._swing_up = swing_up
+    super().__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode.
+    Initializes the cart and pole according to `swing_up`, and in both cases
+    adds a small random initial velocity to break symmetry.
+    Args:
+      physics: An instance of `Physics`.
+    """
+    nv = physics.model.nv
+    if self._swing_up:
+      physics.named.data.qpos['slider'] = .01*self.random.randn()
+      physics.named.data.qpos['hinge_1'] = np.pi + .01*self.random.randn()
+      physics.named.data.qpos[2:] = .1*self.random.randn(nv - 2)
+    else:
+      physics.named.data.qpos['slider'] = self.random.uniform(-.1, .1)
+      physics.named.data.qpos[1:] = self.random.uniform(-.034, .034, nv - 1)
+    physics.named.data.qvel[:] = 0.01 * self.random.randn(physics.model.nv)
+    super().initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation of the (bounded) physics state."""
+    obs = collections.OrderedDict()
+    obs['position'] = physics.bounded_position()
+    obs['velocity'] = physics.velocity()
+    return obs
+
+  def _get_reward(self, physics, sparse):
+    if sparse:
+      cart_in_bounds = rewards.tolerance(physics.cart_position(),
+                                         self._CART_RANGE)
+      angle_in_bounds = rewards.tolerance(physics.pole_angle_cosine(),
+                                          self._ANGLE_COSINE_RANGE).prod()
+      return cart_in_bounds * angle_in_bounds
+    else:
+      upright = (physics.pole_angle_cosine() + 1) / 2
+      centered = rewards.tolerance(physics.cart_position(), margin=2)
+      centered = (1 + centered) / 2
+      small_control = rewards.tolerance(physics.control(), margin=1,
+                                        value_at_margin=0,
+                                        sigmoid='quadratic')[0]
+      small_control = (4 + small_control) / 5
+      small_velocity = rewards.tolerance(physics.angular_vel(), margin=5).min()
+      small_velocity = (1 + small_velocity) / 2
+      return upright.mean() * small_control * small_velocity * centered
+
+  def get_reward(self, physics):
+    """Returns a sparse or a smooth reward, as specified in the constructor."""
+    return self._get_reward(physics, sparse=self._sparse)
\ No newline at end of file
diff --git a/carl/envs/dmc/utils.py b/carl/envs/dmc/utils.py
new file mode 100644
index 00000000..95f36fd1
--- /dev/null
+++ b/carl/envs/dmc/utils.py
@@ -0,0 +1,35 @@
+import inspect
+
+from dm_control import suite
+
+from carl.envs.dmc.dmc_tasks import cartpole
+
+_DOMAINS = {name: module for name, module in locals().items() 
+            if inspect.ismodule(module) and hasattr(module, 'SUITE')}
+
+def load_dmc_env(domain_name, task_name, context={}, task_kwargs=None, environment_kwargs=None,
+                 visualize_reward=False):
+
+    if domain_name not in _DOMAINS and domain_name not in suite._DOMAINS:
+        raise ValueError('Domain {!r} does not exist.'.format(domain_name))
+
+    domain = _DOMAINS[domain_name]
+
+    if task_name in domain.SUITE:
+        task_kwargs = task_kwargs or {}
+        if environment_kwargs is not None:
+            task_kwargs = dict(task_kwargs, environment_kwargs=environment_kwargs)
+        env = domain.SUITE[task_name](context=context, **task_kwargs)
+        env.task.visualize_reward = visualize_reward
+        return env
+    elif (domain_name, task_name) in suite.ALL_TASKS:
+        return suite.load(
+            domain_name=domain_name,
+            task_name=task_name,
+            task_kwargs=task_kwargs,
+            environment_kwargs=environment_kwargs,
+            visualize_reward=visualize_reward,
+        )
+    else:
+        raise ValueError('Level {!r} does not exist in domain {!r}.'.format(
+            task_name, domain_name))
diff --git a/carl/envs/dmc/wrappers.py b/carl/envs/dmc/wrappers.py
index b3ec2c0e..f6ddcc35 100644
--- a/carl/envs/dmc/wrappers.py
+++ b/carl/envs/dmc/wrappers.py
@@ -1,6 +1,6 @@
 from typing import Tuple, Optional, Union, TypeVar
 
-import dm_control.rl
+from dm_control.rl.control import Environment
 import gym
 import numpy as np
 from dm_env import StepType
@@ -10,7 +10,7 @@
 ActType = TypeVar("ActType")
 
 class MujocoToGymWrapper(gym.Env):
-    def __init__(self, env: dm_control.rl.control.Environment):
+    def __init__(self, env: Environment):
         # TODO set seeds
         self.env = env
 

From 268843df05f6d4a982723120f9af54cf1a59bd5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Thu, 9 Jun 2022 15:22:17 +0200
Subject: [PATCH 07/37] remove print

---
 carl/envs/dmc/dmc_tasks/cartpole.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/carl/envs/dmc/dmc_tasks/cartpole.py b/carl/envs/dmc/dmc_tasks/cartpole.py
index c771b45d..a6b642ea 100644
--- a/carl/envs/dmc/dmc_tasks/cartpole.py
+++ b/carl/envs/dmc/dmc_tasks/cartpole.py
@@ -76,9 +76,7 @@ def swingup_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None,
       mjcf.append(etree.Element("option", magnetic=" ".join([str(i) for i in context["magnetic"]])))
     if "wind" not in keys:
       mjcf.append(etree.Element("option", wind=" ".join([str(i) for i in context["wind"]])))
-
     xml_string = etree.tostring(mjcf, pretty_print=True)
-    print(xml_string)
     
   physics = Physics.from_xml_string(xml_string, assets)
   task = Balance(swing_up=True, sparse=False, random=random)

From 3a9ce8ee7f8d628642d754bb0af24b8d7b15164d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Thu, 9 Jun 2022 16:33:24 +0200
Subject: [PATCH 08/37] update cartpole context

---
 carl/envs/dmc/carl_dm_cartpole.py   | 30 ++++++++++++++++++-----------
 carl/envs/dmc/dmc_tasks/cartpole.py | 15 +++++++++------
 2 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/carl/envs/dmc/carl_dm_cartpole.py b/carl/envs/dmc/carl_dm_cartpole.py
index e930f309..5a39b747 100644
--- a/carl/envs/dmc/carl_dm_cartpole.py
+++ b/carl/envs/dmc/carl_dm_cartpole.py
@@ -38,29 +38,37 @@
 """
 
 DEFAULT_CONTEXT = {
-    "gravity": [0., 0., -9.81],
+    "gravity_x": 0.,
+    "gravity_y": 0.,
+    "gravity_z": -9.81,
     "masscart": 1.0,  # Should be seen as 100% and scaled accordingly
     "masspole": 0.1,  # Should be seen as 100% and scaled accordingly
     "pole_length": 1.0,  # Should be seen as 100% and scaled accordingly
     "force_magnifier": 10.0,
     "timestep": 0.01,  # Seconds between updates
-    "magnetic": [0., -0.5, 0.],
-    "wind": [0., 0., 0.],
+    "magnetic_x": 0., # TODO decide if this is useful
+    "magnetic_y": -0.5, 
+    "magnetic_z": 0.,
+    "wind_x": 0., # TODO decide if this is useful
+    "wind_y": 0.,
+    "wind_z": 0.,
 }
 
 CONTEXT_BOUNDS = {
-    "gravity": [(0.1, -np.inf, float), (0.1, -np.inf, float), (0.1, -np.inf, float)],  # Negative gravity
+    "gravity_x": (-0.1, -np.inf, float),
+    "gravity_y": (-0.1, -np.inf, float),
+    "gravity_z": (-0.1, -np.inf, float),  # Negative gravity
     "masscart": (0.1, 10, float),  # Cart mass can be varied by a factor of 10
     "masspole": (0.01, 1, float),  # Pole mass can be varied by a factor of 10
     "pole_length": (0.05, 5, float),  # Pole length can be varied by a factor of 10
     "force_magnifier": (1, 100, int),  # Force magnifier can be varied by a factor of 10
-    "timestep": (
-        0.001,
-        0.1,
-        float,
-    ),  # TODO not sure how much it can be varied ...Update interval can be varied by a factor of 10
-    "magnetic": [(-np.inf, np.inf, float), (-np.inf, np.inf, float), (-np.inf, np.inf, float)],
-    "wind": [(-np.inf, np.inf, float), (-np.inf, np.inf, float), (-np.inf, np.inf, float)],
+    "timestep": (0.001, 0.1, float,),  # TODO not sure how much it should be varied
+    "magnetic_x": (-np.inf, np.inf, float),
+    "magnetic_y": (-np.inf, np.inf, float),
+    "magnetic_z": (-np.inf, np.inf, float),
+    "wind_x": (-np.inf, np.inf, float),
+    "wind_y": (-np.inf, np.inf, float),
+    "wind_z": (-np.inf, np.inf, float),
 }
 
 
diff --git a/carl/envs/dmc/dmc_tasks/cartpole.py b/carl/envs/dmc/dmc_tasks/cartpole.py
index a6b642ea..40fa4fdd 100644
--- a/carl/envs/dmc/dmc_tasks/cartpole.py
+++ b/carl/envs/dmc/dmc_tasks/cartpole.py
@@ -57,25 +57,28 @@ def swingup_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None,
     force.set("gear", str(context["force_magnifier"]))
     keys = []
     options = mjcf.findall("./option")
+    gravity = " ".join([str(context["gravity_x"]), str(context["gravity_y"]), str(context["gravity_z"])])
+    magnetic = " ".join([str(context["magnetic_x"]), str(context["magnetic_y"]), str(context["magnetic_z"])])
+    wind = " ".join([str(context["wind_x"]), str(context["wind_y"]), str(context["wind_z"])])
     for option in options:
       for k, v in option.items():
         keys.append(k)
         if k == "gravity":
-          option.set("gravity", " ".join([str(i) for i in context["gravity"]]))
+          option.set("gravity", gravity)
         elif k == "timestep":
           option.set("timestep", str(context["timestep"]))
         elif k == "magnetic":
-          option.set("magnetic", " ".join([str(i) for i in context["magnetic"]]))
+          option.set("magnetic", magnetic)
         elif k == "wind":
-          option.set("wind", " ".join([str(i) for i in context["wind"]]))
+          option.set("wind", wind)
     if "gravity" not in keys:
-      mjcf.append(etree.Element("option", gravity=" ".join([str(i) for i in context["gravity"]])))
+      mjcf.append(etree.Element("option", gravity=gravity))
     if "timestep" not in keys:
       mjcf.append(etree.Element("option", timestep=str(context["timestep"])))
     if "magnetic" not in keys:
-      mjcf.append(etree.Element("option", magnetic=" ".join([str(i) for i in context["magnetic"]])))
+      mjcf.append(etree.Element("option", magnetic=magnetic))
     if "wind" not in keys:
-      mjcf.append(etree.Element("option", wind=" ".join([str(i) for i in context["wind"]])))
+      mjcf.append(etree.Element("option", wind=wind))
     xml_string = etree.tostring(mjcf, pretty_print=True)
     
   physics = Physics.from_xml_string(xml_string, assets)

From cf756bdc7b238c298a5961bb3fdf2b25295985a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Fri, 10 Jun 2022 10:51:45 +0200
Subject: [PATCH 09/37] general dmc env

---
 carl/envs/dmc/carl_dm_cartpole.py | 39 +-------------
 carl/envs/dmc/carl_dmcontrol.py   | 85 +++++++++++++++----------------
 2 files changed, 44 insertions(+), 80 deletions(-)

diff --git a/carl/envs/dmc/carl_dm_cartpole.py b/carl/envs/dmc/carl_dm_cartpole.py
index 5a39b747..80ab2e4e 100644
--- a/carl/envs/dmc/carl_dm_cartpole.py
+++ b/carl/envs/dmc/carl_dm_cartpole.py
@@ -6,37 +6,9 @@
 from carl.context.selection import AbstractSelector
 from carl.envs.dmc.wrappers import MujocoToGymWrapper
 from carl.envs.dmc.utils import load_dmc_env
-from carl.envs.carl_env import CARLEnv
+from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv
 
 
-"""
-Physics options (defaults for CartPole):
-|           apirate = 100.0                                                │
-│         collision = 0                                                    │
-│              cone = 0                                                    │
-│           density = 0.0                                                  │
-│      disableflags = 0                                                    │
-│       enableflags = 0                                                    │
-│           gravity = array([ 0.  ,  0.  , -9.81])                         │
-│          impratio = 1.0                                                  │
-│        integrator = 0                                                    │
-│        iterations = 100                                                  │
-│          jacobian = 2                                                    │
-│          magnetic = array([ 0. , -0.5,  0. ])                            │
-│    mpr_iterations = 50                                                   │
-│     mpr_tolerance = 1e-06                                                │
-│ noslip_iterations = 0                                                    │
-│  noslip_tolerance = 1e-06                                                │
-│          o_margin = 0.0                                                  │
-│          o_solimp = array([9.0e-01, 9.5e-01, 1.0e-03, 5.0e-01, 2.0e+00]) │
-│          o_solref = array([0.02, 1.  ])                                  │
-│            solver = 2                                                    │
-│          timestep = 0.0025                                               │
-│         tolerance = 1e-08                                                │
-│         viscosity = 0.0                                                  │
-│              wind = array([0., 0., 0.])                                  |
-"""
-
 DEFAULT_CONTEXT = {
     "gravity_x": 0.,
     "gravity_y": 0.,
@@ -72,7 +44,7 @@
 }
 
 
-class CARLDmcCartPoleEnv(CARLEnv):
+class CARLDmcCartPoleEnv(CARLDmcEnv):
     def __init__(
         self,
         domain: str = "cartpole",
@@ -118,10 +90,3 @@ def __init__(
         self.whitelist_gaussian_noise = list(
             DEFAULT_CONTEXT.keys()
         )  # allow to augment all values
-    
-    def _update_context(self) -> None:
-        if self.dict_observation_space:
-            raise NotImplementedError
-        else:
-            env = load_dmc_env(domain_name=self.domain, task_name=self.task, context=self.context, environment_kwargs={"flat_observation": True})
-            self.env = MujocoToGymWrapper(env)
diff --git a/carl/envs/dmc/carl_dmcontrol.py b/carl/envs/dmc/carl_dmcontrol.py
index 10cbbcb2..76a1f974 100644
--- a/carl/envs/dmc/carl_dmcontrol.py
+++ b/carl/envs/dmc/carl_dmcontrol.py
@@ -11,6 +11,7 @@
 
 from carl.envs.carl_env import CARLEnv
 from carl.envs.dmc.wrappers import MujocoToGymWrapper
+from carl.envs.dmc.utils import load_dmc_env
 from carl.utils.trial_logger import TrialLogger
 from carl.context.selection import AbstractSelector
 
@@ -131,46 +132,44 @@
     + TIMING_PARAMETERS
 )
 
-# class CARLDmcEnv(CARLEnv):
-#     def __init__(
-#         self,
-#         env: gym.Env,
-#         contexts: Dict[Any, Dict[Any, Any]] = {},
-#         hide_context: bool = True,
-#         add_gaussian_noise_to_context: bool = False,
-#         gaussian_noise_std_percentage: float = 0.01,
-#         logger: Optional[TrialLogger] = None,
-#         scale_context_features: str = "no",
-#         default_context: Optional[Dict] = DEFAULT_CONTEXT,
-#         max_episode_length: int = 500,  # from https://github.com/openai/gym/blob/master/gym/envs/__init__.py
-#         state_context_features: Optional[List[str]] = None,
-#         dict_observation_space: bool = False,
-#         context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
-#         context_selector_kwargs: Optional[Dict] = None,
-#     ):
-#         # TODO can we have more than 1 env?
-#         env = MujocoToGymWrapper(env)
-#         if not contexts:
-#             contexts = {0: DEFAULT_CONTEXT}
-#         super().__init__(
-#             env=env,
-#             contexts=contexts,
-#             hide_context=hide_context,
-#             add_gaussian_noise_to_context=add_gaussian_noise_to_context,
-#             gaussian_noise_std_percentage=gaussian_noise_std_percentage,
-#             logger=logger,
-#             scale_context_features=scale_context_features,
-#             default_context=default_context,
-#             max_episode_length=max_episode_length,
-#             state_context_features=state_context_features,
-#             dict_observation_space=dict_observation_space,
-#             context_selector=context_selector,
-#             context_selector_kwargs=context_selector_kwargs,
-#         )
-#         self.whitelist_gaussian_noise = list(
-#             DEFAULT_CONTEXT.keys()
-#         )  # allow to augment all values
-#         # print(self.env.env.__dict__)
-#         # print(self.env.env.task.__dict__)
-#         # print(self.env.env.physics.__dict__)
-#         #print(SUITE.Tagged)
+class CARLDmcEnv(CARLEnv):
+    def __init__(
+        self,
+        env: gym.Env,
+        contexts: Dict[Any, Dict[Any, Any]],
+        hide_context: bool,
+        add_gaussian_noise_to_context: bool,
+        gaussian_noise_std_percentage: float,
+        logger: Optional[TrialLogger],
+        scale_context_features: str,
+        default_context: Optional[Dict],
+        max_episode_length: int,
+        state_context_features: Optional[List[str]],
+        dict_observation_space: bool,
+        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]],
+        context_selector_kwargs: Optional[Dict],
+    ):
+        # TODO can we have more than 1 env?
+        # env = MujocoToGymWrapper(env)
+        super().__init__(
+            env=env,
+            contexts=contexts,
+            hide_context=hide_context,
+            add_gaussian_noise_to_context=add_gaussian_noise_to_context,
+            gaussian_noise_std_percentage=gaussian_noise_std_percentage,
+            logger=logger,
+            scale_context_features=scale_context_features,
+            default_context=default_context,
+            max_episode_length=max_episode_length,
+            state_context_features=state_context_features,
+            dict_observation_space=dict_observation_space,
+            context_selector=context_selector,
+            context_selector_kwargs=context_selector_kwargs,
+        )
+
+    def _update_context(self) -> None:
+        if self.dict_observation_space:
+            raise NotImplementedError
+        else:
+            env = load_dmc_env(domain_name=self.domain, task_name=self.task, context=self.context, environment_kwargs={"flat_observation": True})
+            self.env = MujocoToGymWrapper(env)

From b06ded733e7f0b776d2419d74a6b47d4a530bb55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Mon, 13 Jun 2022 11:37:40 +0200
Subject: [PATCH 10/37] add walker

---
 carl/envs/dmc/__init__.py           |   5 +
 carl/envs/dmc/carl_dm_walker.py     |  95 ++++++++++++++
 carl/envs/dmc/dmc_tasks/cartpole.py |   5 +-
 carl/envs/dmc/dmc_tasks/walker.py   | 192 ++++++++++++++++++++++++++++
 4 files changed, 293 insertions(+), 4 deletions(-)
 create mode 100644 carl/envs/dmc/carl_dm_walker.py
 create mode 100644 carl/envs/dmc/dmc_tasks/walker.py

diff --git a/carl/envs/dmc/__init__.py b/carl/envs/dmc/__init__.py
index 59b761f5..238f1daf 100644
--- a/carl/envs/dmc/__init__.py
+++ b/carl/envs/dmc/__init__.py
@@ -3,3 +3,8 @@
     DEFAULT_CONTEXT as CARLDmcCartPoleEnv_defaults,
 )
 from carl.envs.dmc.carl_dm_cartpole import CARLDmcCartPoleEnv
+
+from carl.envs.dmc.carl_dm_walker import (
+    DEFAULT_CONTEXT as CARLDmcWalkerEnv_defaults,
+)
+from carl.envs.dmc.carl_dm_walker import CARLDmcWalkerEnv
diff --git a/carl/envs/dmc/carl_dm_walker.py b/carl/envs/dmc/carl_dm_walker.py
new file mode 100644
index 00000000..ea51d24c
--- /dev/null
+++ b/carl/envs/dmc/carl_dm_walker.py
@@ -0,0 +1,95 @@
+from typing import Any, Dict, List, Optional, Union
+
+import numpy as np
+
+from carl.utils.trial_logger import TrialLogger
+from carl.context.selection import AbstractSelector
+from carl.envs.dmc.wrappers import MujocoToGymWrapper
+from carl.envs.dmc.utils import load_dmc_env
+from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv
+
+
+DEFAULT_CONTEXT = {
+    "joint_stiffness": 5000,
+    "gravity_x": 0.,
+    "gravity_y": 0.,
+    "gravity_z": -9.81,
+    "friction": 0.6,
+    "angular_damping": -0.05,
+    "actuator_strength": 1, # use this as scaling factor
+    "joint_angular_damping": 35,
+    "torso_mass": 10,
+    "timestep": 0.0025,  # Seconds between updates
+    "magnetic_x": 0., # TODO decide if this is useful
+    "magnetic_y": -0.5, 
+    "magnetic_z": 0.,
+    "wind_x": 0., # TODO decide if this is useful
+    "wind_y": 0.,
+    "wind_z": 0.,
+}
+
+CONTEXT_BOUNDS = {
+    "joint_stiffness": (1, np.inf, float),
+    "gravity_x": (-0.1, -np.inf, float),
+    "gravity_y": (-0.1, -np.inf, float),
+    "gravity_z": (-0.1, -np.inf, float),
+    "friction": (-np.inf, np.inf, float),
+    "angular_damping": (-np.inf, np.inf, float),
+    "actuator_strength": (-np.inf, np.inf, float),
+    "joint_angular_damping": (0, np.inf, float),
+    "torso_mass": (0.1, np.inf, float),
+    "timestep": (0.001, 0.1, float,),  # TODO not sure how much it should be varied
+    "magnetic_x": (-np.inf, np.inf, float),
+    "magnetic_y": (-np.inf, np.inf, float),
+    "magnetic_z": (-np.inf, np.inf, float),
+    "wind_x": (-np.inf, np.inf, float),
+    "wind_y": (-np.inf, np.inf, float),
+    "wind_z": (-np.inf, np.inf, float),
+}
+
+class CARLDmcWalkerEnv(CARLDmcEnv):
+    def __init__(
+        self,
+        domain: str = "walker",
+        task: str = "stand_context",
+        contexts: Dict[Any, Dict[Any, Any]] = {},
+        hide_context: bool = False,
+        add_gaussian_noise_to_context: bool = False,
+        gaussian_noise_std_percentage: float = 0.01,
+        logger: Optional[TrialLogger] = None,
+        scale_context_features: str = "no",
+        default_context: Optional[Dict] = DEFAULT_CONTEXT,
+        max_episode_length: int = 500,  # from https://github.com/openai/gym/blob/master/gym/envs/__init__.py
+        state_context_features: Optional[List[str]] = None,
+        dict_observation_space: bool = False,
+        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector_kwargs: Optional[Dict] = None,
+    ):
+        if not contexts:
+            contexts = {0: DEFAULT_CONTEXT}
+        self.domain = domain
+        self.task = task
+        if dict_observation_space:
+            raise NotImplementedError
+        else:
+            env = load_dmc_env(domain_name=domain, task_name=task, context={}, environment_kwargs={"flat_observation": True})
+            env = MujocoToGymWrapper(env)
+        super().__init__(
+            env=env,
+            contexts=contexts,
+            hide_context=hide_context,
+            add_gaussian_noise_to_context=add_gaussian_noise_to_context,
+            gaussian_noise_std_percentage=gaussian_noise_std_percentage,
+            logger=logger,
+            scale_context_features=scale_context_features,
+            default_context=default_context,
+            max_episode_length=max_episode_length,
+            state_context_features=state_context_features,
+            dict_observation_space=dict_observation_space,
+            context_selector=context_selector,
+            context_selector_kwargs=context_selector_kwargs,
+        )
+        # TODO check gaussian noise on context features
+        self.whitelist_gaussian_noise = list(
+            DEFAULT_CONTEXT.keys()
+        )  # allow to augment all values
diff --git a/carl/envs/dmc/dmc_tasks/cartpole.py b/carl/envs/dmc/dmc_tasks/cartpole.py
index 40fa4fdd..834c43e4 100644
--- a/carl/envs/dmc/dmc_tasks/cartpole.py
+++ b/carl/envs/dmc/dmc_tasks/cartpole.py
@@ -28,14 +28,11 @@
 from dm_control.utils import rewards
 from lxml import etree
 import numpy as np
-from dm_control import suite
+
 
 _DEFAULT_TIME_LIMIT = 10
 SUITE = containers.TaggedTasks()
 
-DEFAULT_CONTEXT = {
-  "gravity": -9.81,
-}
 
 def get_model_and_assets(num_poles=1):
   """Returns a tuple containing the model XML string and a dict of assets."""
diff --git a/carl/envs/dmc/dmc_tasks/walker.py b/carl/envs/dmc/dmc_tasks/walker.py
new file mode 100644
index 00000000..abc58dc0
--- /dev/null
+++ b/carl/envs/dmc/dmc_tasks/walker.py
@@ -0,0 +1,192 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Planar Walker Domain."""
+
+import collections
+from multiprocessing.context import _force_start_method
+
+from dm_control import mujoco
+from dm_control.rl import control
+from dm_control.suite import base
+from dm_control.suite import common
+from dm_control.suite.utils import randomizers
+from dm_control.utils import containers
+from dm_control.utils import rewards
+from lxml import etree
+
+
+_DEFAULT_TIME_LIMIT = 25
+_CONTROL_TIMESTEP = .025
+
+# Minimal height of torso over foot above which stand reward is 1.
+_STAND_HEIGHT = 1.2
+
+# Horizontal speeds (meters/second) above which move reward is 1.
+_WALK_SPEED = 1
+_RUN_SPEED = 8
+
+
+SUITE = containers.TaggedTasks()
+
+
+def get_model_and_assets():
+  """Returns a tuple containing the model XML string and a dict of assets."""
+  return common.read_model('walker.xml'), common.ASSETS
+
+
+@SUITE.add('benchmarking')
+def stand_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Stand task."""
+  xml_string, assets = get_model_and_assets()
+  if context != {}:
+    mjcf = etree.fromstring(xml_string)
+    # pole = mjcf.find("./default/default/geom")
+    # pole.set("mass", str(context["masspole"]))
+    # pole.set("fromto", "0 0 0 0 0 " + str(context["pole_length"]))
+    # cart = mjcf.find("./worldbody/body/geom")
+    # cart.set("mass", str(context["masscart"]))
+    actuators = mjcf.findall("./actuator/motor")
+    for actuator in actuators:
+      gear = actuator.get("gear")
+      actuator.set("gear", str(gear * context["actuator_strength"]))
+    keys = []
+    options = mjcf.findall("./option")
+    gravity = " ".join([str(context["gravity_x"]), str(context["gravity_y"]), str(context["gravity_z"])])
+    magnetic = " ".join([str(context["magnetic_x"]), str(context["magnetic_y"]), str(context["magnetic_z"])])
+    wind = " ".join([str(context["wind_x"]), str(context["wind_y"]), str(context["wind_z"])])
+    for option in options:
+      for k, v in option.items():
+        keys.append(k)
+        if k == "gravity":
+          option.set("gravity", gravity)
+        elif k == "timestep":
+          option.set("timestep", str(context["timestep"]))
+        elif k == "magnetic":
+          option.set("magnetic", magnetic)
+        elif k == "wind":
+          option.set("wind", wind)
+    if "gravity" not in keys:
+      mjcf.append(etree.Element("option", gravity=gravity))
+    if "timestep" not in keys:
+      mjcf.append(etree.Element("option", timestep=str(context["timestep"])))
+    if "magnetic" not in keys:
+      mjcf.append(etree.Element("option", magnetic=magnetic))
+    if "wind" not in keys:
+      mjcf.append(etree.Element("option", wind=wind))
+    xml_string = etree.tostring(mjcf, pretty_print=True)
+    print(xml_string)
+    
+  physics = Physics.from_xml_string(xml_string, assets)
+  task = PlanarWalker(move_speed=0, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+
+# @SUITE.add('benchmarking')
+# def walk(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+#   """Returns the Walk task."""
+#   physics = Physics.from_xml_string(*get_model_and_assets())
+#   task = PlanarWalker(move_speed=_WALK_SPEED, random=random)
+#   environment_kwargs = environment_kwargs or {}
+#   return control.Environment(
+#       physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+#       **environment_kwargs)
+
+
+# @SUITE.add('benchmarking')
+# def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+#   """Returns the Run task."""
+#   physics = Physics.from_xml_string(*get_model_and_assets())
+#   task = PlanarWalker(move_speed=_RUN_SPEED, random=random)
+#   environment_kwargs = environment_kwargs or {}
+#   return control.Environment(
+#       physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+#       **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """Physics simulation with additional features for the Walker domain."""
+
+  def torso_upright(self):
+    """Returns projection from z-axes of torso to the z-axes of world."""
+    return self.named.data.xmat['torso', 'zz']
+
+  def torso_height(self):
+    """Returns the height of the torso."""
+    return self.named.data.xpos['torso', 'z']
+
+  def horizontal_velocity(self):
+    """Returns the horizontal velocity of the center-of-mass."""
+    return self.named.data.sensordata['torso_subtreelinvel'][0]
+
+  def orientations(self):
+    """Returns planar orientations of all bodies."""
+    return self.named.data.xmat[1:, ['xx', 'xz']].ravel()
+
+
+class PlanarWalker(base.Task):
+  """A planar walker task."""
+
+  def __init__(self, move_speed, random=None):
+    """Initializes an instance of `PlanarWalker`.
+    Args:
+      move_speed: A float. If this value is zero, reward is given simply for
+        standing up. Otherwise this specifies a target horizontal velocity for
+        the walking task.
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    self._move_speed = move_speed
+    super().__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode.
+    In 'standing' mode, use initial orientation and small velocities.
+    In 'random' mode, randomize joint angles and let fall to the floor.
+    Args:
+      physics: An instance of `Physics`.
+    """
+    randomizers.randomize_limited_and_rotational_joints(physics, self.random)
+    super().initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation of body orientations, height and velocites."""
+    obs = collections.OrderedDict()
+    obs['orientations'] = physics.orientations()
+    obs['height'] = physics.torso_height()
+    obs['velocity'] = physics.velocity()
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a reward to the agent."""
+    standing = rewards.tolerance(physics.torso_height(),
+                                 bounds=(_STAND_HEIGHT, float('inf')),
+                                 margin=_STAND_HEIGHT/2)
+    upright = (1 + physics.torso_upright()) / 2
+    stand_reward = (3*standing + upright) / 4
+    if self._move_speed == 0:
+      return stand_reward
+    else:
+      move_reward = rewards.tolerance(physics.horizontal_velocity(),
+                                      bounds=(self._move_speed, float('inf')),
+                                      margin=self._move_speed/2,
+                                      value_at_margin=0.5,
+                                      sigmoid='linear')
+      return stand_reward * (5*move_reward + 1) / 6

From bf21bb6c3cca14685c4c5e268d72cac98b475bbb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Mon, 13 Jun 2022 11:37:52 +0200
Subject: [PATCH 11/37] fix load dmc

---
 carl/envs/dmc/utils.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/carl/envs/dmc/utils.py b/carl/envs/dmc/utils.py
index 95f36fd1..0c2821d7 100644
--- a/carl/envs/dmc/utils.py
+++ b/carl/envs/dmc/utils.py
@@ -2,7 +2,7 @@
 
 from dm_control import suite
 
-from carl.envs.dmc.dmc_tasks import cartpole
+from carl.envs.dmc.dmc_tasks import cartpole, walker
 
 _DOMAINS = {name: module for name, module in locals().items() 
             if inspect.ismodule(module) and hasattr(module, 'SUITE')}
@@ -10,11 +10,13 @@
 def load_dmc_env(domain_name, task_name, context={}, task_kwargs=None, environment_kwargs=None,
                  visualize_reward=False):
 
-    if domain_name not in _DOMAINS and domain_name not in suite._DOMAINS:
+    if domain_name in _DOMAINS:
+        domain = _DOMAINS[domain_name]
+    elif domain_name in suite._DOMAINS:
+        domain = suite._DOMAINS[domain_name]
+    else:
         raise ValueError('Domain {!r} does not exist.'.format(domain_name))
 
-    domain = _DOMAINS[domain_name]
-
     if task_name in domain.SUITE:
         task_kwargs = task_kwargs or {}
         if environment_kwargs is not None:

From f3aac23240c4cda4d90e0db4932d721052bbeb0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Mon, 13 Jun 2022 12:27:25 +0200
Subject: [PATCH 12/37] update walker context

---
 carl/envs/dmc/carl_dm_walker.py   | 20 +++++++++++---------
 carl/envs/dmc/dmc_tasks/walker.py | 17 ++++++++++-------
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/carl/envs/dmc/carl_dm_walker.py b/carl/envs/dmc/carl_dm_walker.py
index ea51d24c..08cbef8d 100644
--- a/carl/envs/dmc/carl_dm_walker.py
+++ b/carl/envs/dmc/carl_dm_walker.py
@@ -14,11 +14,12 @@
     "gravity_x": 0.,
     "gravity_y": 0.,
     "gravity_z": -9.81,
-    "friction": 0.6,
-    "angular_damping": -0.05,
-    "actuator_strength": 1, # use this as scaling factor
-    "joint_angular_damping": 35,
-    "torso_mass": 10,
+    "friction_tangential": 0.7,
+    "friction_torsional": 0.1,
+    "friction_rolling": 0.1,
+    "actuator_strength": 1, # scale all actuators by this factor
+    "joint_damping": 0.1,
+    # "torso_mass": 10, # TODO find out if mass can be modified
     "timestep": 0.0025,  # Seconds between updates
     "magnetic_x": 0., # TODO decide if this is useful
     "magnetic_y": -0.5, 
@@ -33,11 +34,12 @@
     "gravity_x": (-0.1, -np.inf, float),
     "gravity_y": (-0.1, -np.inf, float),
     "gravity_z": (-0.1, -np.inf, float),
-    "friction": (-np.inf, np.inf, float),
-    "angular_damping": (-np.inf, np.inf, float),
+    "friction_tangential": (-np.inf, np.inf, float), # TODO can friction be negative here?
+    "friction_torsional": (-np.inf, np.inf, float),
+    "friction_rolling": (-np.inf, np.inf, float),
     "actuator_strength": (-np.inf, np.inf, float),
-    "joint_angular_damping": (0, np.inf, float),
-    "torso_mass": (0.1, np.inf, float),
+    "joint_damping": (0, np.inf, float),
+    # "torso_mass": (0.1, np.inf, float),
     "timestep": (0.001, 0.1, float,),  # TODO not sure how much it should be varied
     "magnetic_x": (-np.inf, np.inf, float),
     "magnetic_y": (-np.inf, np.inf, float),
diff --git a/carl/envs/dmc/dmc_tasks/walker.py b/carl/envs/dmc/dmc_tasks/walker.py
index abc58dc0..08d20cee 100644
--- a/carl/envs/dmc/dmc_tasks/walker.py
+++ b/carl/envs/dmc/dmc_tasks/walker.py
@@ -53,15 +53,18 @@ def stand_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, envir
   xml_string, assets = get_model_and_assets()
   if context != {}:
     mjcf = etree.fromstring(xml_string)
-    # pole = mjcf.find("./default/default/geom")
-    # pole.set("mass", str(context["masspole"]))
-    # pole.set("fromto", "0 0 0 0 0 " + str(context["pole_length"]))
-    # cart = mjcf.find("./worldbody/body/geom")
-    # cart.set("mass", str(context["masscart"]))
+    damping = mjcf.find("./default/joint")
+    damping.set("damping", str(context["joint_damping"]))
+    friction = mjcf.find("./default/geom")
+    friction.set("friction", " ".join([
+      str(context["friction_tangential"]), 
+      str(context["friction_torsional"]), 
+      str(context["friction_rolling"])])
+    )
     actuators = mjcf.findall("./actuator/motor")
     for actuator in actuators:
       gear = actuator.get("gear")
-      actuator.set("gear", str(gear * context["actuator_strength"]))
+      actuator.set("gear", str(int(float(gear) * context["actuator_strength"])))
     keys = []
     options = mjcf.findall("./option")
     gravity = " ".join([str(context["gravity_x"]), str(context["gravity_y"]), str(context["gravity_z"])])
@@ -87,7 +90,7 @@ def stand_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, envir
     if "wind" not in keys:
       mjcf.append(etree.Element("option", wind=wind))
     xml_string = etree.tostring(mjcf, pretty_print=True)
-    print(xml_string)
+    # print(xml_string.decode("utf-8"))
     
   physics = Physics.from_xml_string(xml_string, assets)
   task = PlanarWalker(move_speed=0, random=random)

From d71b2d1d10b5ae285a83da9e6de6f050854562dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Mon, 13 Jun 2022 13:33:54 +0200
Subject: [PATCH 13/37] camera id in render

---
 carl/envs/dmc/wrappers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/carl/envs/dmc/wrappers.py b/carl/envs/dmc/wrappers.py
index f6ddcc35..84d4a7be 100644
--- a/carl/envs/dmc/wrappers.py
+++ b/carl/envs/dmc/wrappers.py
@@ -76,7 +76,7 @@ def reset(
             raise NotImplementedError
         return observation
 
-    def render(self, mode="human"):
+    def render(self, mode="human", camera_id=0):
         """Renders the environment.
 
         The set of supported modes varies per environment. (And some
@@ -118,6 +118,6 @@ def render(self, mode='human'):
         if mode == "human":
             raise NotImplementedError
         elif mode == "rgb_array":
-            return self.env.physics.render(camera_id=1)
+            return self.env.physics.render(camera_id=camera_id)
         else:
             raise NotImplementedError

From cf977e4dbfb2c3f4014190a0d31ba74b2c83efd3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Mon, 13 Jun 2022 13:35:14 +0200
Subject: [PATCH 14/37] dmc test function

---
 test_dm_control.py | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 test_dm_control.py

diff --git a/test_dm_control.py b/test_dm_control.py
new file mode 100644
index 00000000..565783f7
--- /dev/null
+++ b/test_dm_control.py
@@ -0,0 +1,35 @@
+from carl.envs.dmc.carl_dm_cartpole import CARLDmcCartPoleEnv
+from carl.envs.dmc.carl_dm_walker import CARLDmcWalkerEnv
+from carl.envs.classic_control import CARLCartPoleEnv
+from carl.envs import CARLDmcCartPoleEnv_defaults as cartpole_default
+from carl.envs import CARLDmcWalkerEnv_defaults as walker_default
+import matplotlib.pyplot as plt
+
+if __name__ == "__main__":
+    # Load one task:
+
+    longer_pole = cartpole_default.copy()
+    longer_pole["pole_length"] = cartpole_default["pole_length"]*2
+    contexts = {0: longer_pole}
+
+    walker_default["actuator_strength"] = walker_default["actuator_strength"]*2
+    contexts = {0: walker_default}
+    # env = load_dmc_env("cartpole", "swingup")
+    carl_env = CARLDmcWalkerEnv(contexts=contexts, hide_context=False)
+    render = lambda : plt.imshow(carl_env.render(mode='rgb_array'))
+    s = carl_env.reset()
+    render()
+    plt.savefig("asdf_dm.png")
+    action = carl_env.action_space.sample()
+    state, reward, done, info = carl_env.step(action=action)
+    print("state", state, type(state))
+
+    s = carl_env.reset()
+    done = False
+    i = 0
+    while not done:
+        action = carl_env.action_space.sample()
+        state, reward, done, info = carl_env.step(action=action)
+        print(state, action, reward, done)
+        i += 1
+    print(i)

From c74aece13d7f4e9f1210cacf509c8e16a1a3cc89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Mon, 13 Jun 2022 15:59:52 +0200
Subject: [PATCH 15/37] add more context tasks to walker

---
 carl/envs/dmc/dmc_tasks/walker.py | 128 ++++++++++++++++--------------
 1 file changed, 68 insertions(+), 60 deletions(-)

diff --git a/carl/envs/dmc/dmc_tasks/walker.py b/carl/envs/dmc/dmc_tasks/walker.py
index 08d20cee..e4763089 100644
--- a/carl/envs/dmc/dmc_tasks/walker.py
+++ b/carl/envs/dmc/dmc_tasks/walker.py
@@ -47,51 +47,54 @@ def get_model_and_assets():
   return common.read_model('walker.xml'), common.ASSETS
 
 
+def adapt_context(xml_string, context):
+  """Adapts and returns the xml_string of the model with the given context."""
+  mjcf = etree.fromstring(xml_string)
+  damping = mjcf.find("./default/joint")
+  damping.set("damping", str(context["joint_damping"]))
+  friction = mjcf.find("./default/geom")
+  friction.set("friction", " ".join([
+    str(context["friction_tangential"]), 
+    str(context["friction_torsional"]), 
+    str(context["friction_rolling"])])
+  )
+  actuators = mjcf.findall("./actuator/motor")
+  for actuator in actuators:
+    gear = actuator.get("gear")
+    actuator.set("gear", str(int(float(gear) * context["actuator_strength"])))
+  keys = []
+  options = mjcf.findall("./option")
+  gravity = " ".join([str(context["gravity_x"]), str(context["gravity_y"]), str(context["gravity_z"])])
+  magnetic = " ".join([str(context["magnetic_x"]), str(context["magnetic_y"]), str(context["magnetic_z"])])
+  wind = " ".join([str(context["wind_x"]), str(context["wind_y"]), str(context["wind_z"])])
+  for option in options:
+    for k, v in option.items():
+      keys.append(k)
+      if k == "gravity":
+        option.set("gravity", gravity)
+      elif k == "timestep":
+        option.set("timestep", str(context["timestep"]))
+      elif k == "magnetic":
+        option.set("magnetic", magnetic)
+      elif k == "wind":
+        option.set("wind", wind)
+  if "gravity" not in keys:
+    mjcf.append(etree.Element("option", gravity=gravity))
+  if "timestep" not in keys:
+    mjcf.append(etree.Element("option", timestep=str(context["timestep"])))
+  if "magnetic" not in keys:
+    mjcf.append(etree.Element("option", magnetic=magnetic))
+  if "wind" not in keys:
+    mjcf.append(etree.Element("option", wind=wind))
+  xml_string = etree.tostring(mjcf, pretty_print=True)
+  return xml_string
+
 @SUITE.add('benchmarking')
 def stand_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
-  """Returns the Stand task."""
+  """Returns the Stand task with the adapted context."""
   xml_string, assets = get_model_and_assets()
   if context != {}:
-    mjcf = etree.fromstring(xml_string)
-    damping = mjcf.find("./default/joint")
-    damping.set("damping", str(context["joint_damping"]))
-    friction = mjcf.find("./default/geom")
-    friction.set("friction", " ".join([
-      str(context["friction_tangential"]), 
-      str(context["friction_torsional"]), 
-      str(context["friction_rolling"])])
-    )
-    actuators = mjcf.findall("./actuator/motor")
-    for actuator in actuators:
-      gear = actuator.get("gear")
-      actuator.set("gear", str(int(float(gear) * context["actuator_strength"])))
-    keys = []
-    options = mjcf.findall("./option")
-    gravity = " ".join([str(context["gravity_x"]), str(context["gravity_y"]), str(context["gravity_z"])])
-    magnetic = " ".join([str(context["magnetic_x"]), str(context["magnetic_y"]), str(context["magnetic_z"])])
-    wind = " ".join([str(context["wind_x"]), str(context["wind_y"]), str(context["wind_z"])])
-    for option in options:
-      for k, v in option.items():
-        keys.append(k)
-        if k == "gravity":
-          option.set("gravity", gravity)
-        elif k == "timestep":
-          option.set("timestep", str(context["timestep"]))
-        elif k == "magnetic":
-          option.set("magnetic", magnetic)
-        elif k == "wind":
-          option.set("wind", wind)
-    if "gravity" not in keys:
-      mjcf.append(etree.Element("option", gravity=gravity))
-    if "timestep" not in keys:
-      mjcf.append(etree.Element("option", timestep=str(context["timestep"])))
-    if "magnetic" not in keys:
-      mjcf.append(etree.Element("option", magnetic=magnetic))
-    if "wind" not in keys:
-      mjcf.append(etree.Element("option", wind=wind))
-    xml_string = etree.tostring(mjcf, pretty_print=True)
-    # print(xml_string.decode("utf-8"))
-    
+    xml_string = adapt_context(xml_string, context)
   physics = Physics.from_xml_string(xml_string, assets)
   task = PlanarWalker(move_speed=0, random=random)
   environment_kwargs = environment_kwargs or {}
@@ -100,27 +103,32 @@ def stand_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, envir
       **environment_kwargs)
 
 
-
-# @SUITE.add('benchmarking')
-# def walk(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
-#   """Returns the Walk task."""
-#   physics = Physics.from_xml_string(*get_model_and_assets())
-#   task = PlanarWalker(move_speed=_WALK_SPEED, random=random)
-#   environment_kwargs = environment_kwargs or {}
-#   return control.Environment(
-#       physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
-#       **environment_kwargs)
+@SUITE.add('benchmarking')
+def walk_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Walk task with the adapted context."""
+  xml_string, assets = get_model_and_assets()
+  if context != {}:
+    xml_string = adapt_context(xml_string, context)
+  physics = Physics.from_xml_string(xml_string, assets)
+  task = PlanarWalker(move_speed=_WALK_SPEED, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
 
 
-# @SUITE.add('benchmarking')
-# def run(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
-#   """Returns the Run task."""
-#   physics = Physics.from_xml_string(*get_model_and_assets())
-#   task = PlanarWalker(move_speed=_RUN_SPEED, random=random)
-#   environment_kwargs = environment_kwargs or {}
-#   return control.Environment(
-#       physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
-#       **environment_kwargs)
+@SUITE.add('benchmarking')
+def run_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Run task with the adapted context."""
+  xml_string, assets = get_model_and_assets()
+  if context != {}:
+    xml_string = adapt_context(xml_string, context)
+  physics = Physics.from_xml_string(xml_string, assets)
+  task = PlanarWalker(move_speed=_RUN_SPEED, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
 
 
 class Physics(mujoco.Physics):

From 1d130d79036f0884093bd9dce6feeb3a71261198 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Mon, 13 Jun 2022 16:25:32 +0200
Subject: [PATCH 16/37] add quadruped

---
 carl/envs/dmc/__init__.py            |   5 +
 carl/envs/dmc/carl_dm_quadruped.py   |  97 +++++
 carl/envs/dmc/carl_dm_walker.py      |   4 +-
 carl/envs/dmc/dmc_tasks/quadruped.py | 517 +++++++++++++++++++++++++++
 carl/envs/dmc/dmc_tasks/walker.py    |   1 +
 carl/envs/dmc/utils.py               |   2 +-
 6 files changed, 623 insertions(+), 3 deletions(-)
 create mode 100644 carl/envs/dmc/carl_dm_quadruped.py
 create mode 100644 carl/envs/dmc/dmc_tasks/quadruped.py

diff --git a/carl/envs/dmc/__init__.py b/carl/envs/dmc/__init__.py
index 238f1daf..3354fbee 100644
--- a/carl/envs/dmc/__init__.py
+++ b/carl/envs/dmc/__init__.py
@@ -8,3 +8,8 @@
     DEFAULT_CONTEXT as CARLDmcWalkerEnv_defaults,
 )
 from carl.envs.dmc.carl_dm_walker import CARLDmcWalkerEnv
+
+from carl.envs.dmc.carl_dm_quadruped import (
+    DEFAULT_CONTEXT as CARLDmcQuadrupedEnv_defaults,
+)
+from carl.envs.dmc.carl_dm_quadruped import CARLDmcQuadrupedEnv
diff --git a/carl/envs/dmc/carl_dm_quadruped.py b/carl/envs/dmc/carl_dm_quadruped.py
new file mode 100644
index 00000000..72de2aff
--- /dev/null
+++ b/carl/envs/dmc/carl_dm_quadruped.py
@@ -0,0 +1,97 @@
+from typing import Any, Dict, List, Optional, Union
+
+import numpy as np
+
+from carl.utils.trial_logger import TrialLogger
+from carl.context.selection import AbstractSelector
+from carl.envs.dmc.wrappers import MujocoToGymWrapper
+from carl.envs.dmc.utils import load_dmc_env
+from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv
+
+
+DEFAULT_CONTEXT = {
+    "joint_stiffness": 5000,
+    "gravity_x": 0.,
+    "gravity_y": 0.,
+    "gravity_z": -9.81,
+    "friction_tangential": 0.7,
+    "friction_torsional": 0.1,
+    "friction_rolling": 0.1,
+    "actuator_strength": 1, # scale all actuators by this factor
+    "joint_damping": 0.1,
+    # "torso_mass": 10, # TODO find out if mass can be modified
+    "timestep": 0.005,  # Seconds between updates
+    "magnetic_x": 0., # TODO decide if this is useful
+    "magnetic_y": -0.5, 
+    "magnetic_z": 0.,
+    "wind_x": 0., # TODO decide if this is useful
+    "wind_y": 0.,
+    "wind_z": 0.,
+}
+
+CONTEXT_BOUNDS = {
+    "joint_stiffness": (1, np.inf, float),
+    "gravity_x": (-0.1, -np.inf, float),
+    "gravity_y": (-0.1, -np.inf, float),
+    "gravity_z": (-0.1, -np.inf, float),
+    "friction_tangential": (-np.inf, np.inf, float), # TODO can friction be negative here?
+    "friction_torsional": (-np.inf, np.inf, float),
+    "friction_rolling": (-np.inf, np.inf, float),
+    "actuator_strength": (-np.inf, np.inf, float),
+    "joint_damping": (0, np.inf, float),
+    # "torso_mass": (0.1, np.inf, float),
+    "timestep": (0.001, 0.1, float,),  # TODO not sure how much it should be varied
+    "magnetic_x": (-np.inf, np.inf, float),
+    "magnetic_y": (-np.inf, np.inf, float),
+    "magnetic_z": (-np.inf, np.inf, float),
+    "wind_x": (-np.inf, np.inf, float),
+    "wind_y": (-np.inf, np.inf, float),
+    "wind_z": (-np.inf, np.inf, float),
+}
+
+class CARLDmcQuadrupedEnv(CARLDmcEnv):
+    def __init__(
+        self,
+        domain: str = "quadruped",
+        task: str = "walk_context",
+        contexts: Dict[Any, Dict[Any, Any]] = {},
+        hide_context: bool = False,
+        add_gaussian_noise_to_context: bool = False,
+        gaussian_noise_std_percentage: float = 0.01,
+        logger: Optional[TrialLogger] = None,
+        scale_context_features: str = "no",
+        default_context: Optional[Dict] = DEFAULT_CONTEXT,
+        max_episode_length: int = 500,  # from https://github.com/openai/gym/blob/master/gym/envs/__init__.py
+        state_context_features: Optional[List[str]] = None,
+        dict_observation_space: bool = False,
+        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector_kwargs: Optional[Dict] = None,
+    ):
+        if not contexts:
+            contexts = {0: DEFAULT_CONTEXT}
+        self.domain = domain
+        self.task = task
+        if dict_observation_space:
+            raise NotImplementedError
+        else:
+            env = load_dmc_env(domain_name=domain, task_name=task, context={}, environment_kwargs={"flat_observation": True})
+            env = MujocoToGymWrapper(env)
+        super().__init__(
+            env=env,
+            contexts=contexts,
+            hide_context=hide_context,
+            add_gaussian_noise_to_context=add_gaussian_noise_to_context,
+            gaussian_noise_std_percentage=gaussian_noise_std_percentage,
+            logger=logger,
+            scale_context_features=scale_context_features,
+            default_context=default_context,
+            max_episode_length=max_episode_length,
+            state_context_features=state_context_features,
+            dict_observation_space=dict_observation_space,
+            context_selector=context_selector,
+            context_selector_kwargs=context_selector_kwargs,
+        )
+        # TODO check gaussian noise on context features
+        self.whitelist_gaussian_noise = list(
+            DEFAULT_CONTEXT.keys()
+        )  # allow to augment all values
diff --git a/carl/envs/dmc/carl_dm_walker.py b/carl/envs/dmc/carl_dm_walker.py
index 08cbef8d..1355ce42 100644
--- a/carl/envs/dmc/carl_dm_walker.py
+++ b/carl/envs/dmc/carl_dm_walker.py
@@ -10,7 +10,7 @@
 
 
 DEFAULT_CONTEXT = {
-    "joint_stiffness": 5000,
+    # "joint_stiffness": 5000, # TODO where to find this
     "gravity_x": 0.,
     "gravity_y": 0.,
     "gravity_z": -9.81,
@@ -30,7 +30,7 @@
 }
 
 CONTEXT_BOUNDS = {
-    "joint_stiffness": (1, np.inf, float),
+    # "joint_stiffness": (1, np.inf, float),
     "gravity_x": (-0.1, -np.inf, float),
     "gravity_y": (-0.1, -np.inf, float),
     "gravity_z": (-0.1, -np.inf, float),
diff --git a/carl/envs/dmc/dmc_tasks/quadruped.py b/carl/envs/dmc/dmc_tasks/quadruped.py
new file mode 100644
index 00000000..ee276dfe
--- /dev/null
+++ b/carl/envs/dmc/dmc_tasks/quadruped.py
@@ -0,0 +1,517 @@
+# Copyright 2019 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Quadruped Domain."""
+
+import collections
+
+from dm_control import mujoco
+from dm_control.mujoco.wrapper import mjbindings
+from dm_control.rl import control
+from dm_control.suite import base
+from dm_control.suite import common
+from dm_control.utils import containers
+from dm_control.utils import rewards
+from dm_control.utils import xml_tools
+from lxml import etree
+import numpy as np
+from scipy import ndimage
+
+enums = mjbindings.enums
+mjlib = mjbindings.mjlib
+
+
+_DEFAULT_TIME_LIMIT = 20
+_CONTROL_TIMESTEP = .02
+
+# Horizontal speeds above which the move reward is 1.
+_RUN_SPEED = 5
+_WALK_SPEED = 0.5
+
+# Constants related to terrain generation.
+_HEIGHTFIELD_ID = 0
+_TERRAIN_SMOOTHNESS = 0.15  # 0.0: maximally bumpy; 1.0: completely smooth.
+_TERRAIN_BUMP_SCALE = 2  # Spatial scale of terrain bumps (in meters).
+
+# Named model elements.
+_TOES = ['toe_front_left', 'toe_back_left', 'toe_back_right', 'toe_front_right']
+_WALLS = ['wall_px', 'wall_py', 'wall_nx', 'wall_ny']
+
+SUITE = containers.TaggedTasks()
+
+
+def make_model(floor_size=None, terrain=False, rangefinders=False,
+               walls_and_ball=False):
+  """Returns the model XML string."""
+  xml_string = common.read_model('quadruped.xml')
+  parser = etree.XMLParser(remove_blank_text=True)
+  mjcf = etree.XML(xml_string, parser)
+
+  # Set floor size.
+  if floor_size is not None:
+    floor_geom = mjcf.find('.//geom[@name=\'floor\']')
+    floor_geom.attrib['size'] = f'{floor_size} {floor_size} .5'
+
+  # Remove walls, ball and target.
+  if not walls_and_ball:
+    for wall in _WALLS:
+      wall_geom = xml_tools.find_element(mjcf, 'geom', wall)
+      wall_geom.getparent().remove(wall_geom)
+
+    # Remove ball.
+    ball_body = xml_tools.find_element(mjcf, 'body', 'ball')
+    ball_body.getparent().remove(ball_body)
+
+    # Remove target.
+    target_site = xml_tools.find_element(mjcf, 'site', 'target')
+    target_site.getparent().remove(target_site)
+
+  # Remove terrain.
+  if not terrain:
+    terrain_geom = xml_tools.find_element(mjcf, 'geom', 'terrain')
+    terrain_geom.getparent().remove(terrain_geom)
+
+  # Remove rangefinders if they're not used, as range computations can be
+  # expensive, especially in a scene with heightfields.
+  if not rangefinders:
+    rangefinder_sensors = mjcf.findall('.//rangefinder')
+    for rf in rangefinder_sensors:
+      rf.getparent().remove(rf)
+
+  return etree.tostring(mjcf, pretty_print=True)
+
+
+def adapt_context(xml_string, context):
+  """Adapts and returns the xml_string of the model with the given context."""
+  mjcf = etree.fromstring(xml_string)
+#   damping = mjcf.find("./default/joint")
+#   damping.set("damping", str(context["joint_damping"]))
+#   friction = mjcf.find("./default/geom")
+#   friction.set("friction", " ".join([
+#     str(context["friction_tangential"]), 
+#     str(context["friction_torsional"]), 
+#     str(context["friction_rolling"])])
+#   )
+#   actuators = mjcf.findall("./actuator/motor")
+#   for actuator in actuators:
+#     gear = actuator.get("gear")
+#     actuator.set("gear", str(int(float(gear) * context["actuator_strength"])))
+  keys = []
+  options = mjcf.findall("./option")
+  gravity = " ".join([str(context["gravity_x"]), str(context["gravity_y"]), str(context["gravity_z"])])
+  magnetic = " ".join([str(context["magnetic_x"]), str(context["magnetic_y"]), str(context["magnetic_z"])])
+  wind = " ".join([str(context["wind_x"]), str(context["wind_y"]), str(context["wind_z"])])
+  for option in options:
+    for k, v in option.items():
+      keys.append(k)
+      if k == "gravity":
+        option.set("gravity", gravity)
+      elif k == "timestep":
+        option.set("timestep", str(context["timestep"]))
+      elif k == "magnetic":
+        option.set("magnetic", magnetic)
+      elif k == "wind":
+        option.set("wind", wind)
+  if "gravity" not in keys:
+    mjcf.append(etree.Element("option", gravity=gravity))
+  if "timestep" not in keys:
+    mjcf.append(etree.Element("option", timestep=str(context["timestep"])))
+  if "magnetic" not in keys:
+    mjcf.append(etree.Element("option", magnetic=magnetic))
+  if "wind" not in keys:
+    mjcf.append(etree.Element("option", wind=wind))
+  xml_string = etree.tostring(mjcf, pretty_print=True)
+  return xml_string
+
+
+@SUITE.add()
+def walk_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Walk task with the adapted context."""
+  xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED)
+  if context != {}:
+    xml_string = adapt_context(xml_string, context)
+  physics = Physics.from_xml_string(xml_string, common.ASSETS)
+  task = Move(desired_speed=_WALK_SPEED, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(physics, task, time_limit=time_limit,
+                             control_timestep=_CONTROL_TIMESTEP,
+                             **environment_kwargs)
+
+
+@SUITE.add()
+def run_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Run task with the adapted context."""
+  xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _RUN_SPEED)
+  if context != {}:
+    xml_string = adapt_context(xml_string, context)
+  physics = Physics.from_xml_string(xml_string, common.ASSETS)
+  task = Move(desired_speed=_RUN_SPEED, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(physics, task, time_limit=time_limit,
+                             control_timestep=_CONTROL_TIMESTEP,
+                             **environment_kwargs)
+
+
+@SUITE.add()
+def escape_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None,
+           environment_kwargs=None):
+  """Returns the Escape task with the adapted context."""
+  xml_string = make_model(floor_size=40, terrain=True, rangefinders=True)
+  if context != {}:
+    xml_string = adapt_context(xml_string, context)
+  physics = Physics.from_xml_string(xml_string, common.ASSETS)
+  task = Escape(random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(physics, task, time_limit=time_limit,
+                             control_timestep=_CONTROL_TIMESTEP,
+                             **environment_kwargs)
+
+
+@SUITE.add()
+def fetch_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Fetch task with the adapted context."""
+  xml_string = make_model(walls_and_ball=True)
+  if context != {}:
+    xml_string = adapt_context(xml_string, context)
+  physics = Physics.from_xml_string(xml_string, common.ASSETS)
+  task = Fetch(random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(physics, task, time_limit=time_limit,
+                             control_timestep=_CONTROL_TIMESTEP,
+                             **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """Physics simulation with additional features for the Quadruped domain."""
+
+  def _reload_from_data(self, data):
+    super()._reload_from_data(data)
+    # Clear cached sensor names when the physics is reloaded.
+    self._sensor_types_to_names = {}
+    self._hinge_names = []
+
+  def _get_sensor_names(self, *sensor_types):
+    try:
+      sensor_names = self._sensor_types_to_names[sensor_types]
+    except KeyError:
+      [sensor_ids] = np.where(np.in1d(self.model.sensor_type, sensor_types))
+      sensor_names = [self.model.id2name(s_id, 'sensor') for s_id in sensor_ids]
+      self._sensor_types_to_names[sensor_types] = sensor_names
+    return sensor_names
+
+  def torso_upright(self):
+    """Returns the dot-product of the torso z-axis and the global z-axis."""
+    return np.asarray(self.named.data.xmat['torso', 'zz'])
+
+  def torso_velocity(self):
+    """Returns the velocity of the torso, in the local frame."""
+    return self.named.data.sensordata['velocimeter'].copy()
+
+  def egocentric_state(self):
+    """Returns the state without global orientation or position."""
+    if not self._hinge_names:
+      [hinge_ids] = np.nonzero(self.model.jnt_type ==
+                               enums.mjtJoint.mjJNT_HINGE)
+      self._hinge_names = [self.model.id2name(j_id, 'joint')
+                           for j_id in hinge_ids]
+    return np.hstack((self.named.data.qpos[self._hinge_names],
+                      self.named.data.qvel[self._hinge_names],
+                      self.data.act))
+
+  def toe_positions(self):
+    """Returns toe positions in egocentric frame."""
+    torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
+    torso_pos = self.named.data.xpos['torso']
+    torso_to_toe = self.named.data.xpos[_TOES] - torso_pos
+    return torso_to_toe.dot(torso_frame)
+
+  def force_torque(self):
+    """Returns scaled force/torque sensor readings at the toes."""
+    force_torque_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_FORCE,
+                                                  enums.mjtSensor.mjSENS_TORQUE)
+    return np.arcsinh(self.named.data.sensordata[force_torque_sensors])
+
+  def imu(self):
+    """Returns IMU-like sensor readings."""
+    imu_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_GYRO,
+                                         enums.mjtSensor.mjSENS_ACCELEROMETER)
+    return self.named.data.sensordata[imu_sensors]
+
+  def rangefinder(self):
+    """Returns scaled rangefinder sensor readings."""
+    rf_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_RANGEFINDER)
+    rf_readings = self.named.data.sensordata[rf_sensors]
+    no_intersection = -1.0
+    return np.where(rf_readings == no_intersection, 1.0, np.tanh(rf_readings))
+
+  def origin_distance(self):
+    """Returns the distance from the origin to the workspace."""
+    return np.asarray(np.linalg.norm(self.named.data.site_xpos['workspace']))
+
+  def origin(self):
+    """Returns origin position in the torso frame."""
+    torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
+    torso_pos = self.named.data.xpos['torso']
+    return -torso_pos.dot(torso_frame)
+
+  def ball_state(self):
+    """Returns ball position and velocity relative to the torso frame."""
+    data = self.named.data
+    torso_frame = data.xmat['torso'].reshape(3, 3)
+    ball_rel_pos = data.xpos['ball'] - data.xpos['torso']
+    ball_rel_vel = data.qvel['ball_root'][:3] - data.qvel['root'][:3]
+    ball_rot_vel = data.qvel['ball_root'][3:]
+    ball_state = np.vstack((ball_rel_pos, ball_rel_vel, ball_rot_vel))
+    return ball_state.dot(torso_frame).ravel()
+
+  def target_position(self):
+    """Returns target position in torso frame."""
+    torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
+    torso_pos = self.named.data.xpos['torso']
+    torso_to_target = self.named.data.site_xpos['target'] - torso_pos
+    return torso_to_target.dot(torso_frame)
+
+  def ball_to_target_distance(self):
+    """Returns horizontal distance from the ball to the target."""
+    ball_to_target = (self.named.data.site_xpos['target'] -
+                      self.named.data.xpos['ball'])
+    return np.linalg.norm(ball_to_target[:2])
+
+  def self_to_ball_distance(self):
+    """Returns horizontal distance from the quadruped workspace to the ball."""
+    self_to_ball = (self.named.data.site_xpos['workspace']
+                    -self.named.data.xpos['ball'])
+    return np.linalg.norm(self_to_ball[:2])
+
+
+def _find_non_contacting_height(physics, orientation, x_pos=0.0, y_pos=0.0):
+  """Find a height with no contacts given a body orientation.
+  Args:
+    physics: An instance of `Physics`.
+    orientation: A quaternion.
+    x_pos: A float. Position along global x-axis.
+    y_pos: A float. Position along global y-axis.
+  Raises:
+    RuntimeError: If a non-contacting configuration has not been found after
+    10,000 attempts.
+  """
+  z_pos = 0.0  # Start embedded in the floor.
+  num_contacts = 1
+  num_attempts = 0
+  # Move up in 1cm increments until no contacts.
+  while num_contacts > 0:
+    try:
+      with physics.reset_context():
+        physics.named.data.qpos['root'][:3] = x_pos, y_pos, z_pos
+        physics.named.data.qpos['root'][3:] = orientation
+    except control.PhysicsError:
+      # We may encounter a PhysicsError here due to filling the contact
+      # buffer, in which case we simply increment the height and continue.
+      pass
+    num_contacts = physics.data.ncon
+    z_pos += 0.01
+    num_attempts += 1
+    if num_attempts > 10000:
+      raise RuntimeError('Failed to find a non-contacting configuration.')
+
+
+def _common_observations(physics):
+  """Returns the observations common to all tasks."""
+  obs = collections.OrderedDict()
+  obs['egocentric_state'] = physics.egocentric_state()
+  obs['torso_velocity'] = physics.torso_velocity()
+  obs['torso_upright'] = physics.torso_upright()
+  obs['imu'] = physics.imu()
+  obs['force_torque'] = physics.force_torque()
+  return obs
+
+
+def _upright_reward(physics, deviation_angle=0):
+  """Returns a reward proportional to how upright the torso is.
+  Args:
+    physics: an instance of `Physics`.
+    deviation_angle: A float, in degrees. The reward is 0 when the torso is
+      exactly upside-down and 1 when the torso's z-axis is less than
+      `deviation_angle` away from the global z-axis.
+  """
+  deviation = np.cos(np.deg2rad(deviation_angle))
+  return rewards.tolerance(
+      physics.torso_upright(),
+      bounds=(deviation, float('inf')),
+      sigmoid='linear',
+      margin=1 + deviation,
+      value_at_margin=0)
+
+
+class Move(base.Task):
+  """A quadruped task solved by moving forward at a designated speed."""
+
+  def __init__(self, desired_speed, random=None):
+    """Initializes an instance of `Move`.
+    Args:
+      desired_speed: A float. If this value is zero, reward is given simply
+        for standing upright. Otherwise this specifies the horizontal velocity
+        at which the velocity-dependent reward component is maximized.
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    self._desired_speed = desired_speed
+    super().__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode.
+    Args:
+      physics: An instance of `Physics`.
+    """
+    # Initial configuration.
+    orientation = self.random.randn(4)
+    orientation /= np.linalg.norm(orientation)
+    _find_non_contacting_height(physics, orientation)
+    super().initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation to the agent."""
+    return _common_observations(physics)
+
+  def get_reward(self, physics):
+    """Returns a reward to the agent."""
+
+    # Move reward term.
+    move_reward = rewards.tolerance(
+        physics.torso_velocity()[0],
+        bounds=(self._desired_speed, float('inf')),
+        margin=self._desired_speed,
+        value_at_margin=0.5,
+        sigmoid='linear')
+
+    return _upright_reward(physics) * move_reward
+
+
+class Escape(base.Task):
+  """A quadruped task solved by escaping a bowl-shaped terrain."""
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode.
+    Args:
+      physics: An instance of `Physics`.
+    """
+    # Get heightfield resolution, assert that it is square.
+    res = physics.model.hfield_nrow[_HEIGHTFIELD_ID]
+    assert res == physics.model.hfield_ncol[_HEIGHTFIELD_ID]
+    # Sinusoidal bowl shape.
+    row_grid, col_grid = np.ogrid[-1:1:res*1j, -1:1:res*1j]
+    radius = np.clip(np.sqrt(col_grid**2 + row_grid**2), .04, 1)
+    bowl_shape = .5 - np.cos(2*np.pi*radius)/2
+    # Random smooth bumps.
+    terrain_size = 2 * physics.model.hfield_size[_HEIGHTFIELD_ID, 0]
+    bump_res = int(terrain_size / _TERRAIN_BUMP_SCALE)
+    bumps = self.random.uniform(_TERRAIN_SMOOTHNESS, 1, (bump_res, bump_res))
+    smooth_bumps = ndimage.zoom(bumps, res / float(bump_res))
+    # Terrain is elementwise product.
+    terrain = bowl_shape * smooth_bumps
+    start_idx = physics.model.hfield_adr[_HEIGHTFIELD_ID]
+    physics.model.hfield_data[start_idx:start_idx+res**2] = terrain.ravel()
+    super().initialize_episode(physics)
+
+    # If we have a rendering context, we need to re-upload the modified
+    # heightfield data.
+    if physics.contexts:
+      with physics.contexts.gl.make_current() as ctx:
+        ctx.call(mjlib.mjr_uploadHField,
+                 physics.model.ptr,
+                 physics.contexts.mujoco.ptr,
+                 _HEIGHTFIELD_ID)
+
+    # Initial configuration.
+    orientation = self.random.randn(4)
+    orientation /= np.linalg.norm(orientation)
+    _find_non_contacting_height(physics, orientation)
+
+  def get_observation(self, physics):
+    """Returns an observation to the agent."""
+    obs = _common_observations(physics)
+    obs['origin'] = physics.origin()
+    obs['rangefinder'] = physics.rangefinder()
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a reward to the agent."""
+
+    # Escape reward term.
+    terrain_size = physics.model.hfield_size[_HEIGHTFIELD_ID, 0]
+    escape_reward = rewards.tolerance(
+        physics.origin_distance(),
+        bounds=(terrain_size, float('inf')),
+        margin=terrain_size,
+        value_at_margin=0,
+        sigmoid='linear')
+
+    return _upright_reward(physics, deviation_angle=20) * escape_reward
+
+
+class Fetch(base.Task):
+  """A quadruped task solved by bringing a ball to the origin."""
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode.
+    Args:
+      physics: An instance of `Physics`.
+    """
+    # Initial configuration, random azimuth and horizontal position.
+    azimuth = self.random.uniform(0, 2*np.pi)
+    orientation = np.array((np.cos(azimuth/2), 0, 0, np.sin(azimuth/2)))
+    spawn_radius = 0.9 * physics.named.model.geom_size['floor', 0]
+    x_pos, y_pos = self.random.uniform(-spawn_radius, spawn_radius, size=(2,))
+    _find_non_contacting_height(physics, orientation, x_pos, y_pos)
+
+    # Initial ball state.
+    physics.named.data.qpos['ball_root'][:2] = self.random.uniform(
+        -spawn_radius, spawn_radius, size=(2,))
+    physics.named.data.qpos['ball_root'][2] = 2
+    physics.named.data.qvel['ball_root'][:2] = 5*self.random.randn(2)
+    super().initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation to the agent."""
+    obs = _common_observations(physics)
+    obs['ball_state'] = physics.ball_state()
+    obs['target_position'] = physics.target_position()
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a reward to the agent."""
+
+    # Reward for moving close to the ball.
+    arena_radius = physics.named.model.geom_size['floor', 0] * np.sqrt(2)
+    workspace_radius = physics.named.model.site_size['workspace', 0]
+    ball_radius = physics.named.model.geom_size['ball', 0]
+    reach_reward = rewards.tolerance(
+        physics.self_to_ball_distance(),
+        bounds=(0, workspace_radius+ball_radius),
+        sigmoid='linear',
+        margin=arena_radius, value_at_margin=0)
+
+    # Reward for bringing the ball to the target.
+    target_radius = physics.named.model.site_size['target', 0]
+    fetch_reward = rewards.tolerance(
+        physics.ball_to_target_distance(),
+        bounds=(0, target_radius),
+        sigmoid='linear',
+        margin=arena_radius, value_at_margin=0)
+
+    reach_then_fetch = reach_reward * (0.5 + 0.5*fetch_reward)
+
+    return _upright_reward(physics) * reach_then_fetch
\ No newline at end of file
diff --git a/carl/envs/dmc/dmc_tasks/walker.py b/carl/envs/dmc/dmc_tasks/walker.py
index e4763089..0ab55bc9 100644
--- a/carl/envs/dmc/dmc_tasks/walker.py
+++ b/carl/envs/dmc/dmc_tasks/walker.py
@@ -89,6 +89,7 @@ def adapt_context(xml_string, context):
   xml_string = etree.tostring(mjcf, pretty_print=True)
   return xml_string
 
+
 @SUITE.add('benchmarking')
 def stand_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
   """Returns the Stand task with the adapted context."""
diff --git a/carl/envs/dmc/utils.py b/carl/envs/dmc/utils.py
index 0c2821d7..d3e7ae85 100644
--- a/carl/envs/dmc/utils.py
+++ b/carl/envs/dmc/utils.py
@@ -2,7 +2,7 @@
 
 from dm_control import suite
 
-from carl.envs.dmc.dmc_tasks import cartpole, walker
+from carl.envs.dmc.dmc_tasks import cartpole, walker, quadruped
 
 _DOMAINS = {name: module for name, module in locals().items() 
             if inspect.ismodule(module) and hasattr(module, 'SUITE')}

From 3ff0cec0c3748022f7f48cd8e76444a0c46bb89b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Mon, 13 Jun 2022 16:56:06 +0200
Subject: [PATCH 17/37] add dmc fish

---
 carl/envs/dmc/__init__.py          |   5 +
 carl/envs/dmc/carl_dm_fish.py      |  96 +++++++++++++
 carl/envs/dmc/carl_dm_quadruped.py |   1 +
 carl/envs/dmc/dmc_tasks/fish.py    | 215 +++++++++++++++++++++++++++++
 carl/envs/dmc/utils.py             |   2 +-
 5 files changed, 318 insertions(+), 1 deletion(-)
 create mode 100644 carl/envs/dmc/carl_dm_fish.py
 create mode 100644 carl/envs/dmc/dmc_tasks/fish.py

diff --git a/carl/envs/dmc/__init__.py b/carl/envs/dmc/__init__.py
index 3354fbee..8c010db1 100644
--- a/carl/envs/dmc/__init__.py
+++ b/carl/envs/dmc/__init__.py
@@ -13,3 +13,8 @@
     DEFAULT_CONTEXT as CARLDmcQuadrupedEnv_defaults,
 )
 from carl.envs.dmc.carl_dm_quadruped import CARLDmcQuadrupedEnv
+
+from carl.envs.dmc.carl_dm_fish import (
+    DEFAULT_CONTEXT as CARLDmcFishEnv_defaults,
+)
+from carl.envs.dmc.carl_dm_fish import CARLDmcFishEnv
diff --git a/carl/envs/dmc/carl_dm_fish.py b/carl/envs/dmc/carl_dm_fish.py
new file mode 100644
index 00000000..b53c308e
--- /dev/null
+++ b/carl/envs/dmc/carl_dm_fish.py
@@ -0,0 +1,96 @@
+from typing import Any, Dict, List, Optional, Union
+
+import numpy as np
+
+from carl.utils.trial_logger import TrialLogger
+from carl.context.selection import AbstractSelector
+from carl.envs.dmc.wrappers import MujocoToGymWrapper
+from carl.envs.dmc.utils import load_dmc_env
+from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv
+
+
+DEFAULT_CONTEXT = {
+    # TODO update contexts
+    # "gravity_x": 0., # gravity disabled in this env
+    # "gravity_y": 0.,
+    # "gravity_z": -9.81,
+    "friction_tangential": 0.7,
+    "friction_torsional": 0.1,
+    "friction_rolling": 0.1,
+    "actuator_strength": 1, # scale all actuators by this factor
+    "joint_damping": 0.00002,
+    # "torso_mass": 10, # TODO find out if mass can be modified
+    "timestep": 0.004,  # Seconds between updates
+    "magnetic_x": 0., # TODO decide if this is useful
+    "magnetic_y": -0.5, 
+    "magnetic_z": 0.,
+    "wind_x": 0., # TODO decide if this is useful
+    "wind_y": 0.,
+    "wind_z": 0.,
+}
+
+CONTEXT_BOUNDS = {
+    # "gravity_x": (-0.1, -np.inf, float),
+    # "gravity_y": (-0.1, -np.inf, float),
+    # "gravity_z": (-0.1, -np.inf, float),
+    "friction_tangential": (-np.inf, np.inf, float), # TODO can friction be negative here?
+    "friction_torsional": (-np.inf, np.inf, float),
+    "friction_rolling": (-np.inf, np.inf, float),
+    "actuator_strength": (-np.inf, np.inf, float),
+    "joint_damping": (0, np.inf, float),
+    # "torso_mass": (0.1, np.inf, float),
+    "timestep": (0.001, 0.1, float,),  # TODO not sure how much it should be varied
+    "magnetic_x": (-np.inf, np.inf, float),
+    "magnetic_y": (-np.inf, np.inf, float),
+    "magnetic_z": (-np.inf, np.inf, float),
+    "wind_x": (-np.inf, np.inf, float),
+    "wind_y": (-np.inf, np.inf, float),
+    "wind_z": (-np.inf, np.inf, float),
+}
+
+class CARLDmcFishEnv(CARLDmcEnv):
+    def __init__(
+        self,
+        domain: str = "fish",
+        task: str = "upright_context",
+        contexts: Dict[Any, Dict[Any, Any]] = {},
+        hide_context: bool = False,
+        add_gaussian_noise_to_context: bool = False,
+        gaussian_noise_std_percentage: float = 0.01,
+        logger: Optional[TrialLogger] = None,
+        scale_context_features: str = "no",
+        default_context: Optional[Dict] = DEFAULT_CONTEXT,
+        max_episode_length: int = 500,  # from https://github.com/openai/gym/blob/master/gym/envs/__init__.py
+        state_context_features: Optional[List[str]] = None,
+        dict_observation_space: bool = False,
+        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector_kwargs: Optional[Dict] = None,
+    ):
+        if not contexts:
+            contexts = {0: DEFAULT_CONTEXT}
+        self.domain = domain
+        self.task = task
+        if dict_observation_space:
+            raise NotImplementedError
+        else:
+            env = load_dmc_env(domain_name=domain, task_name=task, context={}, environment_kwargs={"flat_observation": True})
+            env = MujocoToGymWrapper(env)
+        super().__init__(
+            env=env,
+            contexts=contexts,
+            hide_context=hide_context,
+            add_gaussian_noise_to_context=add_gaussian_noise_to_context,
+            gaussian_noise_std_percentage=gaussian_noise_std_percentage,
+            logger=logger,
+            scale_context_features=scale_context_features,
+            default_context=default_context,
+            max_episode_length=max_episode_length,
+            state_context_features=state_context_features,
+            dict_observation_space=dict_observation_space,
+            context_selector=context_selector,
+            context_selector_kwargs=context_selector_kwargs,
+        )
+        # TODO check gaussian noise on context features
+        self.whitelist_gaussian_noise = list(
+            DEFAULT_CONTEXT.keys()
+        )  # allow to augment all values
diff --git a/carl/envs/dmc/carl_dm_quadruped.py b/carl/envs/dmc/carl_dm_quadruped.py
index 72de2aff..96367e63 100644
--- a/carl/envs/dmc/carl_dm_quadruped.py
+++ b/carl/envs/dmc/carl_dm_quadruped.py
@@ -10,6 +10,7 @@
 
 
 DEFAULT_CONTEXT = {
+    # TODO update contexts
     "joint_stiffness": 5000,
     "gravity_x": 0.,
     "gravity_y": 0.,
diff --git a/carl/envs/dmc/dmc_tasks/fish.py b/carl/envs/dmc/dmc_tasks/fish.py
new file mode 100644
index 00000000..b7475025
--- /dev/null
+++ b/carl/envs/dmc/dmc_tasks/fish.py
@@ -0,0 +1,215 @@
+# Copyright 2017 The dm_control Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Fish Domain."""
+
+import collections
+
+from dm_control import mujoco
+from dm_control.rl import control
+from dm_control.suite import base
+from dm_control.suite import common
+from dm_control.utils import containers
+from dm_control.utils import rewards
+import numpy as np
+from lxml import etree
+
+
+_DEFAULT_TIME_LIMIT = 40
+_CONTROL_TIMESTEP = .04
+_JOINTS = ['tail1',
+           'tail_twist',
+           'tail2',
+           'finright_roll',
+           'finright_pitch',
+           'finleft_roll',
+           'finleft_pitch']
+SUITE = containers.TaggedTasks()
+
+
+def get_model_and_assets():
+  """Returns a tuple containing the model XML string and a dict of assets."""
+  return common.read_model('fish.xml'), common.ASSETS
+
+
+def adapt_context(xml_string, context):
+  """Adapts and returns the xml_string of the model with the given context."""
+  mjcf = etree.fromstring(xml_string)
+  damping = mjcf.find("./default/default/joint")
+  damping.set("damping", str(context["joint_damping"]))
+#   friction = mjcf.find("./default/geom")
+#   friction.set("friction", " ".join([
+#     str(context["friction_tangential"]), 
+#     str(context["friction_torsional"]), 
+#     str(context["friction_rolling"])])
+#   )
+#   actuators = mjcf.findall("./actuator/motor")
+#   for actuator in actuators:
+#     gear = actuator.get("gear")
+#     actuator.set("gear", str(int(float(gear) * context["actuator_strength"])))
+  keys = []
+  options = mjcf.findall("./option")
+  magnetic = " ".join([str(context["magnetic_x"]), str(context["magnetic_y"]), str(context["magnetic_z"])])
+  wind = " ".join([str(context["wind_x"]), str(context["wind_y"]), str(context["wind_z"])])
+  for option in options:
+    for k, v in option.items():
+      keys.append(k)
+      if k == "timestep":
+        option.set("timestep", str(context["timestep"]))
+      elif k == "magnetic":
+        option.set("magnetic", magnetic)
+      elif k == "wind":
+        option.set("wind", wind)
+  if "timestep" not in keys:
+    mjcf.append(etree.Element("option", timestep=str(context["timestep"])))
+  if "magnetic" not in keys:
+    mjcf.append(etree.Element("option", magnetic=magnetic))
+  if "wind" not in keys:
+    mjcf.append(etree.Element("option", wind=wind))
+  xml_string = etree.tostring(mjcf, pretty_print=True)
+  return xml_string
+
+
+@SUITE.add('benchmarking')
+def upright_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None,
+            environment_kwargs=None):
+  """Returns the Fish Upright task."""
+  xml_string, assets = get_model_and_assets()
+  if context != {}:
+    xml_string = adapt_context(xml_string, context)
+  physics = Physics.from_xml_string(xml_string, assets)
+  task = Upright(random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
+      **environment_kwargs)
+
+
+@SUITE.add('benchmarking')
+def swim_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Fish Swim task."""
+  xml_string, assets = get_model_and_assets()
+  if context != {}:
+    xml_string = adapt_context(xml_string, context)
+  physics = Physics.from_xml_string(xml_string, assets)
+  task = Swim(random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
+      **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """Physics simulation with additional features for the Fish domain."""
+
+  def upright(self):
+    """Returns projection from z-axes of torso to the z-axes of worldbody."""
+    return self.named.data.xmat['torso', 'zz']
+
+  def torso_velocity(self):
+    """Returns velocities and angular velocities of the torso."""
+    return self.data.sensordata
+
+  def joint_velocities(self):
+    """Returns the joint velocities."""
+    return self.named.data.qvel[_JOINTS]
+
+  def joint_angles(self):
+    """Returns the joint positions."""
+    return self.named.data.qpos[_JOINTS]
+
+  def mouth_to_target(self):
+    """Returns a vector, from mouth to target in local coordinate of mouth."""
+    data = self.named.data
+    mouth_to_target_global = data.geom_xpos['target'] - data.geom_xpos['mouth']
+    return mouth_to_target_global.dot(data.geom_xmat['mouth'].reshape(3, 3))
+
+
+class Upright(base.Task):
+  """A Fish `Task` for getting the torso upright with smooth reward."""
+
+  def __init__(self, random=None):
+    """Initializes an instance of `Upright`.
+    Args:
+      random: Either an existing `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically.
+    """
+    super().__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Randomizes the tail and fin angles and the orientation of the Fish."""
+    quat = self.random.randn(4)
+    physics.named.data.qpos['root'][3:7] = quat / np.linalg.norm(quat)
+    for joint in _JOINTS:
+      physics.named.data.qpos[joint] = self.random.uniform(-.2, .2)
+    # Hide the target. It's irrelevant for this task.
+    physics.named.model.geom_rgba['target', 3] = 0
+    super().initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation of joint angles, velocities and uprightness."""
+    obs = collections.OrderedDict()
+    obs['joint_angles'] = physics.joint_angles()
+    obs['upright'] = physics.upright()
+    obs['velocity'] = physics.velocity()
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a smooth reward."""
+    return rewards.tolerance(physics.upright(), bounds=(1, 1), margin=1)
+
+
+class Swim(base.Task):
+  """A Fish `Task` for swimming with smooth reward."""
+
+  def __init__(self, random=None):
+    """Initializes an instance of `Swim`.
+    Args:
+      random: Optional, either a `numpy.random.RandomState` instance, an
+        integer seed for creating a new `RandomState`, or None to select a seed
+        automatically (default).
+    """
+    super().__init__(random=random)
+
+  def initialize_episode(self, physics):
+    """Sets the state of the environment at the start of each episode."""
+
+    quat = self.random.randn(4)
+    physics.named.data.qpos['root'][3:7] = quat / np.linalg.norm(quat)
+    for joint in _JOINTS:
+      physics.named.data.qpos[joint] = self.random.uniform(-.2, .2)
+    # Randomize target position.
+    physics.named.model.geom_pos['target', 'x'] = self.random.uniform(-.4, .4)
+    physics.named.model.geom_pos['target', 'y'] = self.random.uniform(-.4, .4)
+    physics.named.model.geom_pos['target', 'z'] = self.random.uniform(.1, .3)
+    super().initialize_episode(physics)
+
+  def get_observation(self, physics):
+    """Returns an observation of joints, target direction and velocities."""
+    obs = collections.OrderedDict()
+    obs['joint_angles'] = physics.joint_angles()
+    obs['upright'] = physics.upright()
+    obs['target'] = physics.mouth_to_target()
+    obs['velocity'] = physics.velocity()
+    return obs
+
+  def get_reward(self, physics):
+    """Returns a smooth reward."""
+    radii = physics.named.model.geom_size[['mouth', 'target'], 0].sum()
+    in_target = rewards.tolerance(np.linalg.norm(physics.mouth_to_target()),
+                                  bounds=(0, radii), margin=2*radii)
+    is_upright = 0.5 * (physics.upright() + 1)
+    return (7*in_target + is_upright) / 8
\ No newline at end of file
diff --git a/carl/envs/dmc/utils.py b/carl/envs/dmc/utils.py
index d3e7ae85..f5607ece 100644
--- a/carl/envs/dmc/utils.py
+++ b/carl/envs/dmc/utils.py
@@ -2,7 +2,7 @@
 
 from dm_control import suite
 
-from carl.envs.dmc.dmc_tasks import cartpole, walker, quadruped
+from carl.envs.dmc.dmc_tasks import cartpole, walker, quadruped, fish
 
 _DOMAINS = {name: module for name, module in locals().items() 
             if inspect.ismodule(module) and hasattr(module, 'SUITE')}

From ea53c3c0b8b8b9926d635e740d97c1e6a9f7600a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Mon, 20 Jun 2022 11:18:03 +0200
Subject: [PATCH 18/37] walker context defaults, adapt context utils

---
 carl/envs/dmc/carl_dm_walker.py     | 43 ++++++++++-------------
 carl/envs/dmc/dmc_tasks/cartpole.py | 10 ++----
 carl/envs/dmc/dmc_tasks/utils.py    | 54 +++++++++++++++++++++++++++++
 carl/envs/dmc/dmc_tasks/walker.py   | 45 +-----------------------
 test_dm_control.py                  | 29 ++++++++++++----
 5 files changed, 99 insertions(+), 82 deletions(-)
 create mode 100644 carl/envs/dmc/dmc_tasks/utils.py

diff --git a/carl/envs/dmc/carl_dm_walker.py b/carl/envs/dmc/carl_dm_walker.py
index 1355ce42..ac9916dc 100644
--- a/carl/envs/dmc/carl_dm_walker.py
+++ b/carl/envs/dmc/carl_dm_walker.py
@@ -1,3 +1,4 @@
+from ast import JoinedStr
 from typing import Any, Dict, List, Optional, Union
 
 import numpy as np
@@ -10,40 +11,34 @@
 
 
 DEFAULT_CONTEXT = {
-    # "joint_stiffness": 5000, # TODO where to find this
-    "gravity_x": 0.,
-    "gravity_y": 0.,
-    "gravity_z": -9.81,
+    "gravity": -9.81,
     "friction_tangential": 0.7,
     "friction_torsional": 0.1,
     "friction_rolling": 0.1,
-    "actuator_strength": 1, # scale all actuators by this factor
-    "joint_damping": 0.1,
-    # "torso_mass": 10, # TODO find out if mass can be modified
     "timestep": 0.0025,  # Seconds between updates
-    "magnetic_x": 0., # TODO decide if this is useful
-    "magnetic_y": -0.5, 
-    "magnetic_z": 0.,
-    "wind_x": 0., # TODO decide if this is useful
+    "joint_damping": 0.1,
+    "joint_stiffness": 0.,
+    "actuator_strength": 1, # scaling factor for all actuators in the model
+    "density": 0.,
+    "viscosity": 0.,
+    "geom_density": 1, # scaling factor for all geom (objects) densities
+    "wind_x": 0.,
     "wind_y": 0.,
     "wind_z": 0.,
 }
 
 CONTEXT_BOUNDS = {
-    # "joint_stiffness": (1, np.inf, float),
-    "gravity_x": (-0.1, -np.inf, float),
-    "gravity_y": (-0.1, -np.inf, float),
-    "gravity_z": (-0.1, -np.inf, float),
-    "friction_tangential": (-np.inf, np.inf, float), # TODO can friction be negative here?
-    "friction_torsional": (-np.inf, np.inf, float),
-    "friction_rolling": (-np.inf, np.inf, float),
-    "actuator_strength": (-np.inf, np.inf, float),
+    "gravity": (-0.1, -np.inf, float),
+    "friction_tangential": (0, np.inf, float),
+    "friction_torsional": (0, np.inf, float),
+    "friction_rolling": (0, np.inf, float),
+    "timestep": (0.001, 0.1, float,),
     "joint_damping": (0, np.inf, float),
-    # "torso_mass": (0.1, np.inf, float),
-    "timestep": (0.001, 0.1, float,),  # TODO not sure how much it should be varied
-    "magnetic_x": (-np.inf, np.inf, float),
-    "magnetic_y": (-np.inf, np.inf, float),
-    "magnetic_z": (-np.inf, np.inf, float),
+    "joint_stiffness": (0, np.inf, float),
+    "actuator_strength": (0, np.inf, float),
+    "density": (0, np.inf, float),
+    "viscosity": (0, np.inf, float),
+    "geom_density": (0, np.inf, float),
     "wind_x": (-np.inf, np.inf, float),
     "wind_y": (-np.inf, np.inf, float),
     "wind_z": (-np.inf, np.inf, float),
diff --git a/carl/envs/dmc/dmc_tasks/cartpole.py b/carl/envs/dmc/dmc_tasks/cartpole.py
index 834c43e4..8ff6bc4d 100644
--- a/carl/envs/dmc/dmc_tasks/cartpole.py
+++ b/carl/envs/dmc/dmc_tasks/cartpole.py
@@ -51,11 +51,11 @@ def swingup_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None,
     cart = mjcf.find("./worldbody/body/geom")
     cart.set("mass", str(context["masscart"]))
     force = mjcf.find("./actuator/motor")
-    force.set("gear", str(context["force_magnifier"]))
+    gear = force.get("gear")
+    force.set("gear", str(int(float(gear) * context["actuator_strength"])))
     keys = []
     options = mjcf.findall("./option")
-    gravity = " ".join([str(context["gravity_x"]), str(context["gravity_y"]), str(context["gravity_z"])])
-    magnetic = " ".join([str(context["magnetic_x"]), str(context["magnetic_y"]), str(context["magnetic_z"])])
+    gravity = " ".join(["0", "0", str(context["gravity"])])
     wind = " ".join([str(context["wind_x"]), str(context["wind_y"]), str(context["wind_z"])])
     for option in options:
       for k, v in option.items():
@@ -64,16 +64,12 @@ def swingup_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None,
           option.set("gravity", gravity)
         elif k == "timestep":
           option.set("timestep", str(context["timestep"]))
-        elif k == "magnetic":
-          option.set("magnetic", magnetic)
         elif k == "wind":
           option.set("wind", wind)
     if "gravity" not in keys:
       mjcf.append(etree.Element("option", gravity=gravity))
     if "timestep" not in keys:
       mjcf.append(etree.Element("option", timestep=str(context["timestep"])))
-    if "magnetic" not in keys:
-      mjcf.append(etree.Element("option", magnetic=magnetic))
     if "wind" not in keys:
       mjcf.append(etree.Element("option", wind=wind))
     xml_string = etree.tostring(mjcf, pretty_print=True)
diff --git a/carl/envs/dmc/dmc_tasks/utils.py b/carl/envs/dmc/dmc_tasks/utils.py
new file mode 100644
index 00000000..2c75c97a
--- /dev/null
+++ b/carl/envs/dmc/dmc_tasks/utils.py
@@ -0,0 +1,54 @@
+from lxml import etree
+
+
+def adapt_context(xml_string, context):
+  """Adapts and returns the xml_string of the model with the given context."""
+  print(context)
+  mjcf = etree.fromstring(xml_string)
+  damping = mjcf.find("./default/joint")
+  damping.set("damping", str(context["joint_damping"]))
+  geom = mjcf.find("./default/geom")
+  geom.set("friction", " ".join([
+    str(context["friction_tangential"]), 
+    str(context["friction_torsional"]), 
+    str(context["friction_rolling"])])
+  )
+  geom_density = geom.get("density")
+  if not geom_density:
+    geom_density = 1000
+  geom.set("density", str(geom_density * context["geom_density"]))
+  actuators = mjcf.findall("./actuator/motor")
+  for actuator in actuators:
+    gear = actuator.get("gear")
+    actuator.set("gear", str(int(float(gear) * context["actuator_strength"])))
+  keys = []
+  options = mjcf.findall("./option")
+  gravity = " ".join(["0", "0", str(context["gravity"])])
+  wind = " ".join([str(context["wind_x"]), str(context["wind_y"]), str(context["wind_z"])])
+  for option in options:
+    for k, _ in option.items():
+      keys.append(k)
+      if k == "gravity":
+        option.set("gravity", gravity)
+      elif k == "timestep":
+        option.set("timestep", str(context["timestep"]))
+      elif k == "density":
+        option.set("density", str(context["density"]))
+      elif k == "viscosity":
+        option.set("viscosity", str(context["viscosity"]))
+      elif k == "wind":
+        option.set("wind", wind)
+      
+  if "gravity" not in keys:
+    mjcf.append(etree.Element("option", gravity=gravity))
+  if "timestep" not in keys:
+    mjcf.append(etree.Element("option", timestep=str(context["timestep"])))
+  if "wind" not in keys:
+    mjcf.append(etree.Element("option", wind=wind))
+  if "density" not in keys:
+    mjcf.append(etree.Element("option", density=str(context["density"])))
+  if "viscosity" not in keys:
+    mjcf.append(etree.Element("option", viscosity=str(context["viscosity"])))
+  xml_string = etree.tostring(mjcf, pretty_print=True)
+  print(xml_string.decode("utf-8"))
+  return xml_string
\ No newline at end of file
diff --git a/carl/envs/dmc/dmc_tasks/walker.py b/carl/envs/dmc/dmc_tasks/walker.py
index 0ab55bc9..205eb9b1 100644
--- a/carl/envs/dmc/dmc_tasks/walker.py
+++ b/carl/envs/dmc/dmc_tasks/walker.py
@@ -25,7 +25,7 @@
 from dm_control.suite.utils import randomizers
 from dm_control.utils import containers
 from dm_control.utils import rewards
-from lxml import etree
+from carl.envs.dmc.dmc_tasks.utils import adapt_context
 
 
 _DEFAULT_TIME_LIMIT = 25
@@ -47,49 +47,6 @@ def get_model_and_assets():
   return common.read_model('walker.xml'), common.ASSETS
 
 
-def adapt_context(xml_string, context):
-  """Adapts and returns the xml_string of the model with the given context."""
-  mjcf = etree.fromstring(xml_string)
-  damping = mjcf.find("./default/joint")
-  damping.set("damping", str(context["joint_damping"]))
-  friction = mjcf.find("./default/geom")
-  friction.set("friction", " ".join([
-    str(context["friction_tangential"]), 
-    str(context["friction_torsional"]), 
-    str(context["friction_rolling"])])
-  )
-  actuators = mjcf.findall("./actuator/motor")
-  for actuator in actuators:
-    gear = actuator.get("gear")
-    actuator.set("gear", str(int(float(gear) * context["actuator_strength"])))
-  keys = []
-  options = mjcf.findall("./option")
-  gravity = " ".join([str(context["gravity_x"]), str(context["gravity_y"]), str(context["gravity_z"])])
-  magnetic = " ".join([str(context["magnetic_x"]), str(context["magnetic_y"]), str(context["magnetic_z"])])
-  wind = " ".join([str(context["wind_x"]), str(context["wind_y"]), str(context["wind_z"])])
-  for option in options:
-    for k, v in option.items():
-      keys.append(k)
-      if k == "gravity":
-        option.set("gravity", gravity)
-      elif k == "timestep":
-        option.set("timestep", str(context["timestep"]))
-      elif k == "magnetic":
-        option.set("magnetic", magnetic)
-      elif k == "wind":
-        option.set("wind", wind)
-  if "gravity" not in keys:
-    mjcf.append(etree.Element("option", gravity=gravity))
-  if "timestep" not in keys:
-    mjcf.append(etree.Element("option", timestep=str(context["timestep"])))
-  if "magnetic" not in keys:
-    mjcf.append(etree.Element("option", magnetic=magnetic))
-  if "wind" not in keys:
-    mjcf.append(etree.Element("option", wind=wind))
-  xml_string = etree.tostring(mjcf, pretty_print=True)
-  return xml_string
-
-
 @SUITE.add('benchmarking')
 def stand_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
   """Returns the Stand task with the adapted context."""
diff --git a/test_dm_control.py b/test_dm_control.py
index 565783f7..c2d27bde 100644
--- a/test_dm_control.py
+++ b/test_dm_control.py
@@ -1,21 +1,35 @@
+import imp
 from carl.envs.dmc.carl_dm_cartpole import CARLDmcCartPoleEnv
 from carl.envs.dmc.carl_dm_walker import CARLDmcWalkerEnv
+from carl.envs.dmc.carl_dm_quadruped import CARLDmcQuadrupedEnv
+from carl.envs.dmc.carl_dm_fish import CARLDmcFishEnv
 from carl.envs.classic_control import CARLCartPoleEnv
 from carl.envs import CARLDmcCartPoleEnv_defaults as cartpole_default
 from carl.envs import CARLDmcWalkerEnv_defaults as walker_default
+from carl.envs import CARLDmcQuadrupedEnv_defaults as quadruped_default
+from carl.envs import CARLDmcFishEnv_defaults as fish_default
 import matplotlib.pyplot as plt
 
 if __name__ == "__main__":
     # Load one task:
 
-    longer_pole = cartpole_default.copy()
-    longer_pole["pole_length"] = cartpole_default["pole_length"]*2
-    contexts = {0: longer_pole}
+    # longer_pole = cartpole_default.copy()
+    # longer_pole["pole_length"] = cartpole_default["pole_length"]*2
+    # contexts = {0: longer_pole}
+    # carl_env = CARLDmcCartPoleEnv(task="swingup_context", contexts=contexts, hide_context=False)
+    
+    stronger_act = walker_default.copy()
+    stronger_act["actuator_strength"] = walker_default["actuator_strength"]*2
+    contexts = {0: stronger_act}
+    carl_env = CARLDmcWalkerEnv(task="stand_context", contexts=contexts, hide_context=False)
 
-    walker_default["actuator_strength"] = walker_default["actuator_strength"]*2
-    contexts = {0: walker_default}
-    # env = load_dmc_env("cartpole", "swingup")
-    carl_env = CARLDmcWalkerEnv(contexts=contexts, hide_context=False)
+    # stronger_act = walker_default.copy()
+    # stronger_act["actuator_strength"] = quadruped_default["actuator_strength"]*2
+    # contexts = {0: stronger_act}
+    # carl_env = CARLDmcQuadrupedEnv(task="walk_context", contexts=contexts, hide_context=False)
+
+    # contexts = {0: fish_default}
+    # carl_env = CARLDmcFishEnv(task="swim_context", contexts=contexts, hide_context=False)
     render = lambda : plt.imshow(carl_env.render(mode='rgb_array'))
     s = carl_env.reset()
     render()
@@ -23,6 +37,7 @@
     action = carl_env.action_space.sample()
     state, reward, done, info = carl_env.step(action=action)
     print("state", state, type(state))
+    assert False
 
     s = carl_env.reset()
     done = False

From af96a0b70dbb8607d3a6b9123736b28fdb0abe77 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Mon, 20 Jun 2022 16:38:46 +0200
Subject: [PATCH 19/37] update adapt context function

---
 carl/envs/dmc/dmc_tasks/utils.py | 100 ++++++++++++++++++++++++-------
 1 file changed, 77 insertions(+), 23 deletions(-)

diff --git a/carl/envs/dmc/dmc_tasks/utils.py b/carl/envs/dmc/dmc_tasks/utils.py
index 2c75c97a..bd7825bd 100644
--- a/carl/envs/dmc/dmc_tasks/utils.py
+++ b/carl/envs/dmc/dmc_tasks/utils.py
@@ -1,33 +1,88 @@
+from wsgiref.simple_server import demo_app
 from lxml import etree
+from torch import ge
 
 
 def adapt_context(xml_string, context):
   """Adapts and returns the xml_string of the model with the given context."""
-  print(context)
   mjcf = etree.fromstring(xml_string)
-  damping = mjcf.find("./default/joint")
-  damping.set("damping", str(context["joint_damping"]))
+  default = mjcf.find("./default/")
+  if default is None:
+    default = etree.Element("default")
+    mjcf.addnext(default)
+
+  # adjust damping for all joints if damping is already an attribute
+  for joint_find in mjcf.findall(".//joint[@damping]"):
+    joint_damping = joint_find.get("damping")
+    joint_find.set("damping", str(float(joint_damping) * context["joint_damping"]))
+
+  # adjust stiffness for all joints if stiffness is already an attribute
+  for joint_find in mjcf.findall(".//joint[@stiffness]"):
+    joint_stiffness = joint_find.get("stiffness")
+    joint_find.set("stiffness", str(float(joint_stiffness) * context["joint_stiffness"]))
+
+  # set default joint damping if default/joint is not present
+  joint = mjcf.find("./default/joint")
+  if joint is None:
+    joint = etree.Element("joint")
+    default.addnext(joint)
+    def_joint_damping = 0.1
+    default_joint_damping = str(float(def_joint_damping) * context["joint_damping"])
+    joint.set("damping", default_joint_damping)
+    default_joint_stiffness = str(context["joint_stiffness"])
+    joint.set("stiffness", default_joint_stiffness)
+
+  # adjust friction for all geom elements with friction attribute
+  for geom_find in mjcf.findall(".//geom[@friction]"):
+    friction = geom_find.get("friction").split(" ")
+    frict_str = ""
+    for f, d in zip(friction, [context["friction_tangential"]*2, context["friction_torsional"], context["friction_rolling"]]):
+      frict_str += str(float(f) * d) + " "
+    geom_find.set("friction", frict_str[:-1])
+
+  # adjust density for all geom elements with density attribute
+  for geom_find in mjcf.findall(".//geom[@density]"):
+    geom_find.set("density", str(float(geom_find.get("density")) * context["geom_density"]))
+
+  # create default geom if it does not exist
   geom = mjcf.find("./default/geom")
-  geom.set("friction", " ".join([
-    str(context["friction_tangential"]), 
-    str(context["friction_torsional"]), 
-    str(context["friction_rolling"])])
-  )
+  if geom is None:
+    geom = etree.Element("geom")
+    default.addnext(geom)
+
+  # set default friction
+  if geom.get("friction") is None:
+    default_friction_tangential = 1.
+    default_friction_torsional = 0.005
+    default_friction_rolling = 0.0001
+    geom.set("friction", " ".join([
+      str(default_friction_tangential * context["friction_tangential"]), 
+      str(default_friction_torsional * context["friction_torsional"]), 
+      str(default_friction_rolling * context["friction_rolling"])])
+    )
+
+  # set default density
   geom_density = geom.get("density")
-  if not geom_density:
+  if geom_density is None:
     geom_density = 1000
-  geom.set("density", str(geom_density * context["geom_density"]))
-  actuators = mjcf.findall("./actuator/motor")
+    geom.set("density", str(float(geom_density) * context["geom_density"]))
+
+  actuators = mjcf.findall("./actuator/")
   for actuator in actuators:
     gear = actuator.get("gear")
-    actuator.set("gear", str(int(float(gear) * context["actuator_strength"])))
-  keys = []
-  options = mjcf.findall("./option")
+    if gear is None:
+      gear = 1
+    actuator.set("gear", str(float(gear) * context["actuator_strength"]))
+
+
+  # find option settings and override them if they exist, otherwise create new option
+  option_keys = []
+  options = mjcf.findall(".//option")
   gravity = " ".join(["0", "0", str(context["gravity"])])
   wind = " ".join([str(context["wind_x"]), str(context["wind_y"]), str(context["wind_z"])])
   for option in options:
     for k, _ in option.items():
-      keys.append(k)
+      option_keys.append(k)
       if k == "gravity":
         option.set("gravity", gravity)
       elif k == "timestep":
@@ -38,17 +93,16 @@ def adapt_context(xml_string, context):
         option.set("viscosity", str(context["viscosity"]))
       elif k == "wind":
         option.set("wind", wind)
-      
-  if "gravity" not in keys:
+  if "gravity" not in option_keys:
     mjcf.append(etree.Element("option", gravity=gravity))
-  if "timestep" not in keys:
+  if "timestep" not in option_keys:
     mjcf.append(etree.Element("option", timestep=str(context["timestep"])))
-  if "wind" not in keys:
+  if "wind" not in option_keys:
     mjcf.append(etree.Element("option", wind=wind))
-  if "density" not in keys:
+  if "density" not in option_keys:
     mjcf.append(etree.Element("option", density=str(context["density"])))
-  if "viscosity" not in keys:
+  if "viscosity" not in option_keys:
     mjcf.append(etree.Element("option", viscosity=str(context["viscosity"])))
+
   xml_string = etree.tostring(mjcf, pretty_print=True)
-  print(xml_string.decode("utf-8"))
-  return xml_string
\ No newline at end of file
+  return xml_string

From f711123a0e90bb5407cb4f87ed20538da5bdb7a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Mon, 20 Jun 2022 16:39:11 +0200
Subject: [PATCH 20/37] quadruped adapt context utils

---
 carl/envs/dmc/dmc_tasks/quadruped.py | 44 +---------------------------
 1 file changed, 1 insertion(+), 43 deletions(-)

diff --git a/carl/envs/dmc/dmc_tasks/quadruped.py b/carl/envs/dmc/dmc_tasks/quadruped.py
index ee276dfe..264a9f6b 100644
--- a/carl/envs/dmc/dmc_tasks/quadruped.py
+++ b/carl/envs/dmc/dmc_tasks/quadruped.py
@@ -25,6 +25,7 @@
 from dm_control.utils import containers
 from dm_control.utils import rewards
 from dm_control.utils import xml_tools
+from carl.envs.dmc.dmc_tasks.utils import adapt_context
 from lxml import etree
 import numpy as np
 from scipy import ndimage
@@ -93,49 +94,6 @@ def make_model(floor_size=None, terrain=False, rangefinders=False,
   return etree.tostring(mjcf, pretty_print=True)
 
 
-def adapt_context(xml_string, context):
-  """Adapts and returns the xml_string of the model with the given context."""
-  mjcf = etree.fromstring(xml_string)
-#   damping = mjcf.find("./default/joint")
-#   damping.set("damping", str(context["joint_damping"]))
-#   friction = mjcf.find("./default/geom")
-#   friction.set("friction", " ".join([
-#     str(context["friction_tangential"]), 
-#     str(context["friction_torsional"]), 
-#     str(context["friction_rolling"])])
-#   )
-#   actuators = mjcf.findall("./actuator/motor")
-#   for actuator in actuators:
-#     gear = actuator.get("gear")
-#     actuator.set("gear", str(int(float(gear) * context["actuator_strength"])))
-  keys = []
-  options = mjcf.findall("./option")
-  gravity = " ".join([str(context["gravity_x"]), str(context["gravity_y"]), str(context["gravity_z"])])
-  magnetic = " ".join([str(context["magnetic_x"]), str(context["magnetic_y"]), str(context["magnetic_z"])])
-  wind = " ".join([str(context["wind_x"]), str(context["wind_y"]), str(context["wind_z"])])
-  for option in options:
-    for k, v in option.items():
-      keys.append(k)
-      if k == "gravity":
-        option.set("gravity", gravity)
-      elif k == "timestep":
-        option.set("timestep", str(context["timestep"]))
-      elif k == "magnetic":
-        option.set("magnetic", magnetic)
-      elif k == "wind":
-        option.set("wind", wind)
-  if "gravity" not in keys:
-    mjcf.append(etree.Element("option", gravity=gravity))
-  if "timestep" not in keys:
-    mjcf.append(etree.Element("option", timestep=str(context["timestep"])))
-  if "magnetic" not in keys:
-    mjcf.append(etree.Element("option", magnetic=magnetic))
-  if "wind" not in keys:
-    mjcf.append(etree.Element("option", wind=wind))
-  xml_string = etree.tostring(mjcf, pretty_print=True)
-  return xml_string
-
-
 @SUITE.add()
 def walk_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
   """Returns the Walk task with the adapted context."""

From f18c7994703ad97f8b4024800d7c4bb8af7309b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Mon, 20 Jun 2022 16:39:43 +0200
Subject: [PATCH 21/37] update dmc context parameters

---
 carl/envs/dmc/carl_dm_cartpole.py  | 22 ++++----------
 carl/envs/dmc/carl_dm_quadruped.py | 49 +++++++++++++-----------------
 carl/envs/dmc/carl_dm_walker.py    | 12 ++++----
 3 files changed, 33 insertions(+), 50 deletions(-)

diff --git a/carl/envs/dmc/carl_dm_cartpole.py b/carl/envs/dmc/carl_dm_cartpole.py
index 80ab2e4e..bb8aed8e 100644
--- a/carl/envs/dmc/carl_dm_cartpole.py
+++ b/carl/envs/dmc/carl_dm_cartpole.py
@@ -10,34 +10,24 @@
 
 
 DEFAULT_CONTEXT = {
-    "gravity_x": 0.,
-    "gravity_y": 0.,
-    "gravity_z": -9.81,
+    "gravity": -9.81,
     "masscart": 1.0,  # Should be seen as 100% and scaled accordingly
     "masspole": 0.1,  # Should be seen as 100% and scaled accordingly
     "pole_length": 1.0,  # Should be seen as 100% and scaled accordingly
-    "force_magnifier": 10.0,
+    "actuator_strength": 1.0,
     "timestep": 0.01,  # Seconds between updates
-    "magnetic_x": 0., # TODO decide if this is useful
-    "magnetic_y": -0.5, 
-    "magnetic_z": 0.,
-    "wind_x": 0., # TODO decide if this is useful
+    "wind_x": 0.,
     "wind_y": 0.,
     "wind_z": 0.,
 }
 
 CONTEXT_BOUNDS = {
-    "gravity_x": (-0.1, -np.inf, float),
-    "gravity_y": (-0.1, -np.inf, float),
-    "gravity_z": (-0.1, -np.inf, float),  # Negative gravity
+    "gravity": (-0.1, -np.inf, float),  # Negative gravity
     "masscart": (0.1, 10, float),  # Cart mass can be varied by a factor of 10
     "masspole": (0.01, 1, float),  # Pole mass can be varied by a factor of 10
     "pole_length": (0.05, 5, float),  # Pole length can be varied by a factor of 10
-    "force_magnifier": (1, 100, int),  # Force magnifier can be varied by a factor of 10
-    "timestep": (0.001, 0.1, float,),  # TODO not sure how much it should be varied
-    "magnetic_x": (-np.inf, np.inf, float),
-    "magnetic_y": (-np.inf, np.inf, float),
-    "magnetic_z": (-np.inf, np.inf, float),
+    "actuator_strength": (1, 100, int),  # Force magnifier can be varied by a factor of 10
+    "timestep": (0.001, 0.1, float,),
     "wind_x": (-np.inf, np.inf, float),
     "wind_y": (-np.inf, np.inf, float),
     "wind_z": (-np.inf, np.inf, float),
diff --git a/carl/envs/dmc/carl_dm_quadruped.py b/carl/envs/dmc/carl_dm_quadruped.py
index 96367e63..4e200530 100644
--- a/carl/envs/dmc/carl_dm_quadruped.py
+++ b/carl/envs/dmc/carl_dm_quadruped.py
@@ -10,41 +10,34 @@
 
 
 DEFAULT_CONTEXT = {
-    # TODO update contexts
-    "joint_stiffness": 5000,
-    "gravity_x": 0.,
-    "gravity_y": 0.,
-    "gravity_z": -9.81,
-    "friction_tangential": 0.7,
-    "friction_torsional": 0.1,
-    "friction_rolling": 0.1,
-    "actuator_strength": 1, # scale all actuators by this factor
-    "joint_damping": 0.1,
-    # "torso_mass": 10, # TODO find out if mass can be modified
+    "gravity": -9.81,
+    "friction_tangential": 1, # Scaling factor for tangential friction of all geoms (objects)
+    "friction_torsional": 1, # Scaling factor for torsional friction of all geoms (objects)
+    "friction_rolling": 1, # Scaling factor for rolling friction of all geoms (objects)
     "timestep": 0.005,  # Seconds between updates
-    "magnetic_x": 0., # TODO decide if this is useful
-    "magnetic_y": -0.5, 
-    "magnetic_z": 0.,
-    "wind_x": 0., # TODO decide if this is useful
+    "joint_damping": 1., # Scaling factor for all joints
+    "joint_stiffness": 0.,
+    "actuator_strength": 1, # Scaling factor for all actuators in the model
+    "density": 0.,
+    "viscosity": 0.,
+    "geom_density": 1., # Scaling factor for all geom (objects) densities
+    "wind_x": 0.,
     "wind_y": 0.,
     "wind_z": 0.,
 }
 
 CONTEXT_BOUNDS = {
-    "joint_stiffness": (1, np.inf, float),
-    "gravity_x": (-0.1, -np.inf, float),
-    "gravity_y": (-0.1, -np.inf, float),
-    "gravity_z": (-0.1, -np.inf, float),
-    "friction_tangential": (-np.inf, np.inf, float), # TODO can friction be negative here?
-    "friction_torsional": (-np.inf, np.inf, float),
-    "friction_rolling": (-np.inf, np.inf, float),
-    "actuator_strength": (-np.inf, np.inf, float),
+    "gravity": (-0.1, -np.inf, float),
+    "friction_tangential": (0, np.inf, float),
+    "friction_torsional": (0, np.inf, float),
+    "friction_rolling": (0, np.inf, float),
+    "timestep": (0.001, 0.1, float,),
     "joint_damping": (0, np.inf, float),
-    # "torso_mass": (0.1, np.inf, float),
-    "timestep": (0.001, 0.1, float,),  # TODO not sure how much it should be varied
-    "magnetic_x": (-np.inf, np.inf, float),
-    "magnetic_y": (-np.inf, np.inf, float),
-    "magnetic_z": (-np.inf, np.inf, float),
+    "joint_stiffness": (0, np.inf, float),
+    "actuator_strength": (0, np.inf, float),
+    "density": (0, np.inf, float),
+    "viscosity": (0, np.inf, float),
+    "geom_density": (0, np.inf, float),
     "wind_x": (-np.inf, np.inf, float),
     "wind_y": (-np.inf, np.inf, float),
     "wind_z": (-np.inf, np.inf, float),
diff --git a/carl/envs/dmc/carl_dm_walker.py b/carl/envs/dmc/carl_dm_walker.py
index ac9916dc..639ccc3b 100644
--- a/carl/envs/dmc/carl_dm_walker.py
+++ b/carl/envs/dmc/carl_dm_walker.py
@@ -12,16 +12,16 @@
 
 DEFAULT_CONTEXT = {
     "gravity": -9.81,
-    "friction_tangential": 0.7,
-    "friction_torsional": 0.1,
-    "friction_rolling": 0.1,
+    "friction_tangential": 1., # Scaling factor for tangential friction of all geoms (objects)
+    "friction_torsional": 1., # Scaling factor for torsional friction of all geoms (objects)
+    "friction_rolling": 1., # Scaling factor for rolling friction of all geoms (objects)
     "timestep": 0.0025,  # Seconds between updates
-    "joint_damping": 0.1,
+    "joint_damping": 1., # Scaling factor for all joints
     "joint_stiffness": 0.,
-    "actuator_strength": 1, # scaling factor for all actuators in the model
+    "actuator_strength": 1., # Scaling factor for all actuators in the model
     "density": 0.,
     "viscosity": 0.,
-    "geom_density": 1, # scaling factor for all geom (objects) densities
+    "geom_density": 1., # Scaling factor for all geom (objects) densities
     "wind_x": 0.,
     "wind_y": 0.,
     "wind_z": 0.,

From 12f6ea64acb9db6d4dc6b7b92f65460d90500928 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Tue, 21 Jun 2022 09:08:53 +0200
Subject: [PATCH 22/37] rename utils->loader

---
 carl/envs/dmc/carl_dm_cartpole.py     | 2 +-
 carl/envs/dmc/carl_dm_quadruped.py    | 2 +-
 carl/envs/dmc/carl_dm_walker.py       | 2 +-
 carl/envs/dmc/carl_dmcontrol.py       | 6 ++++--
 carl/envs/dmc/{utils.py => loader.py} | 4 ++--
 5 files changed, 9 insertions(+), 7 deletions(-)
 rename carl/envs/dmc/{utils.py => loader.py} (86%)

diff --git a/carl/envs/dmc/carl_dm_cartpole.py b/carl/envs/dmc/carl_dm_cartpole.py
index bb8aed8e..2d4e3915 100644
--- a/carl/envs/dmc/carl_dm_cartpole.py
+++ b/carl/envs/dmc/carl_dm_cartpole.py
@@ -5,7 +5,7 @@
 from carl.utils.trial_logger import TrialLogger
 from carl.context.selection import AbstractSelector
 from carl.envs.dmc.wrappers import MujocoToGymWrapper
-from carl.envs.dmc.utils import load_dmc_env
+from carl.envs.dmc.loader import load_dmc_env
 from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv
 
 
diff --git a/carl/envs/dmc/carl_dm_quadruped.py b/carl/envs/dmc/carl_dm_quadruped.py
index 4e200530..0e241db2 100644
--- a/carl/envs/dmc/carl_dm_quadruped.py
+++ b/carl/envs/dmc/carl_dm_quadruped.py
@@ -5,7 +5,7 @@
 from carl.utils.trial_logger import TrialLogger
 from carl.context.selection import AbstractSelector
 from carl.envs.dmc.wrappers import MujocoToGymWrapper
-from carl.envs.dmc.utils import load_dmc_env
+from carl.envs.dmc.loader import load_dmc_env
 from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv
 
 
diff --git a/carl/envs/dmc/carl_dm_walker.py b/carl/envs/dmc/carl_dm_walker.py
index 639ccc3b..75d13667 100644
--- a/carl/envs/dmc/carl_dm_walker.py
+++ b/carl/envs/dmc/carl_dm_walker.py
@@ -6,7 +6,7 @@
 from carl.utils.trial_logger import TrialLogger
 from carl.context.selection import AbstractSelector
 from carl.envs.dmc.wrappers import MujocoToGymWrapper
-from carl.envs.dmc.utils import load_dmc_env
+from carl.envs.dmc.loader import load_dmc_env
 from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv
 
 
diff --git a/carl/envs/dmc/carl_dmcontrol.py b/carl/envs/dmc/carl_dmcontrol.py
index 76a1f974..47e35947 100644
--- a/carl/envs/dmc/carl_dmcontrol.py
+++ b/carl/envs/dmc/carl_dmcontrol.py
@@ -11,7 +11,7 @@
 
 from carl.envs.carl_env import CARLEnv
 from carl.envs.dmc.wrappers import MujocoToGymWrapper
-from carl.envs.dmc.utils import load_dmc_env
+from carl.envs.dmc.loader import load_dmc_env
 from carl.utils.trial_logger import TrialLogger
 from carl.context.selection import AbstractSelector
 
@@ -137,6 +137,7 @@ def __init__(
         self,
         env: gym.Env,
         contexts: Dict[Any, Dict[Any, Any]],
+        context_mask: Optional[List[str]],
         hide_context: bool,
         add_gaussian_noise_to_context: bool,
         gaussian_noise_std_percentage: float,
@@ -151,6 +152,7 @@ def __init__(
     ):
         # TODO can we have more than 1 env?
         # env = MujocoToGymWrapper(env)
+        self.context_mask = context_mask
         super().__init__(
             env=env,
             contexts=contexts,
@@ -171,5 +173,5 @@ def _update_context(self) -> None:
         if self.dict_observation_space:
             raise NotImplementedError
         else:
-            env = load_dmc_env(domain_name=self.domain, task_name=self.task, context=self.context, environment_kwargs={"flat_observation": True})
+            env = load_dmc_env(domain_name=self.domain, task_name=self.task, context=self.context, context_mask=self.context_mask, environment_kwargs={"flat_observation": True})
             self.env = MujocoToGymWrapper(env)
diff --git a/carl/envs/dmc/utils.py b/carl/envs/dmc/loader.py
similarity index 86%
rename from carl/envs/dmc/utils.py
rename to carl/envs/dmc/loader.py
index f5607ece..5482a747 100644
--- a/carl/envs/dmc/utils.py
+++ b/carl/envs/dmc/loader.py
@@ -7,7 +7,7 @@
 _DOMAINS = {name: module for name, module in locals().items() 
             if inspect.ismodule(module) and hasattr(module, 'SUITE')}
 
-def load_dmc_env(domain_name, task_name, context={}, task_kwargs=None, environment_kwargs=None,
+def load_dmc_env(domain_name, task_name, context={}, context_mask=[], task_kwargs=None, environment_kwargs=None,
                  visualize_reward=False):
 
     if domain_name in _DOMAINS:
@@ -33,5 +33,5 @@ def load_dmc_env(domain_name, task_name, context={}, task_kwargs=None, environme
             visualize_reward=visualize_reward,
         )
     else:
-        raise ValueError('Level {!r} does not exist in domain {!r}.'.format(
+        raise ValueError('Task {!r} does not exist in domain {!r}.'.format(
             task_name, domain_name))

From f148fb97df61def787eeeb561f2c3cd09eede315 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Tue, 21 Jun 2022 10:54:34 +0200
Subject: [PATCH 23/37] add masking to dmc envs

---
 carl/envs/dmc/__init__.py            |  11 +-
 carl/envs/dmc/carl_dm_fish.py        |  62 ++++++-----
 carl/envs/dmc/carl_dm_quadruped.py   |  17 ++-
 carl/envs/dmc/carl_dm_walker.py      |  10 +-
 carl/envs/dmc/dmc_tasks/fish.py      |  48 +--------
 carl/envs/dmc/dmc_tasks/quadruped.py |  16 +--
 carl/envs/dmc/dmc_tasks/utils.py     | 148 +++++++++++++++------------
 carl/envs/dmc/dmc_tasks/walker.py    |  12 +--
 carl/envs/dmc/loader.py              |   2 +-
 test_dm_control.py                   |  30 +++---
 10 files changed, 184 insertions(+), 172 deletions(-)

diff --git a/carl/envs/dmc/__init__.py b/carl/envs/dmc/__init__.py
index 8c010db1..9aa699fd 100644
--- a/carl/envs/dmc/__init__.py
+++ b/carl/envs/dmc/__init__.py
@@ -1,20 +1,23 @@
 # Contexts and bounds by name
 from carl.envs.dmc.carl_dm_cartpole import (
+    CARLDmcCartPoleEnv,
     DEFAULT_CONTEXT as CARLDmcCartPoleEnv_defaults,
 )
-from carl.envs.dmc.carl_dm_cartpole import CARLDmcCartPoleEnv
 
 from carl.envs.dmc.carl_dm_walker import (
+    CARLDmcWalkerEnv,
     DEFAULT_CONTEXT as CARLDmcWalkerEnv_defaults,
+    CONTEXT_MASK as CARLDmcWalkerEnv_mask,
 )
-from carl.envs.dmc.carl_dm_walker import CARLDmcWalkerEnv
 
 from carl.envs.dmc.carl_dm_quadruped import (
+    CARLDmcQuadrupedEnv,
     DEFAULT_CONTEXT as CARLDmcQuadrupedEnv_defaults,
+    CONTEXT_MASK as CARLDmcQuadrupedEnv_mask,
 )
-from carl.envs.dmc.carl_dm_quadruped import CARLDmcQuadrupedEnv
 
 from carl.envs.dmc.carl_dm_fish import (
+    CARLDmcFishEnv,
     DEFAULT_CONTEXT as CARLDmcFishEnv_defaults,
+    CONTEXT_MASK as CARLDmcFishEnv_mask,
 )
-from carl.envs.dmc.carl_dm_fish import CARLDmcFishEnv
diff --git a/carl/envs/dmc/carl_dm_fish.py b/carl/envs/dmc/carl_dm_fish.py
index b53c308e..2960aa17 100644
--- a/carl/envs/dmc/carl_dm_fish.py
+++ b/carl/envs/dmc/carl_dm_fish.py
@@ -5,55 +5,60 @@
 from carl.utils.trial_logger import TrialLogger
 from carl.context.selection import AbstractSelector
 from carl.envs.dmc.wrappers import MujocoToGymWrapper
-from carl.envs.dmc.utils import load_dmc_env
+from carl.envs.dmc.loader import load_dmc_env
 from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv
 
 
 DEFAULT_CONTEXT = {
-    # TODO update contexts
-    # "gravity_x": 0., # gravity disabled in this env
-    # "gravity_y": 0.,
-    # "gravity_z": -9.81,
-    "friction_tangential": 0.7,
-    "friction_torsional": 0.1,
-    "friction_rolling": 0.1,
-    "actuator_strength": 1, # scale all actuators by this factor
-    "joint_damping": 0.00002,
-    # "torso_mass": 10, # TODO find out if mass can be modified
+    "gravity": -9.81, # Gravity is disabled via flag
+    "friction_tangential": 1, # Scaling factor for tangential friction of all geoms (objects)
+    "friction_torsional": 1, # Scaling factor for torsional friction of all geoms (objects)
+    "friction_rolling": 1, # Scaling factor for rolling friction of all geoms (objects)
     "timestep": 0.004,  # Seconds between updates
-    "magnetic_x": 0., # TODO decide if this is useful
-    "magnetic_y": -0.5, 
-    "magnetic_z": 0.,
-    "wind_x": 0., # TODO decide if this is useful
+    "joint_damping": 1., # Scaling factor for all joints
+    "joint_stiffness": 0.,
+    "actuator_strength": 1, # Scaling factor for all actuators in the model
+    "density": 6000.,
+    "viscosity": 0.,
+    "geom_density": 1., # No effect, because no gravity
+    "wind_x": 0.,
     "wind_y": 0.,
     "wind_z": 0.,
 }
 
 CONTEXT_BOUNDS = {
-    # "gravity_x": (-0.1, -np.inf, float),
-    # "gravity_y": (-0.1, -np.inf, float),
-    # "gravity_z": (-0.1, -np.inf, float),
-    "friction_tangential": (-np.inf, np.inf, float), # TODO can friction be negative here?
-    "friction_torsional": (-np.inf, np.inf, float),
-    "friction_rolling": (-np.inf, np.inf, float),
-    "actuator_strength": (-np.inf, np.inf, float),
+    "gravity": (-0.1, -np.inf, float),
+    "friction_tangential": (0, np.inf, float),
+    "friction_torsional": (0, np.inf, float),
+    "friction_rolling": (0, np.inf, float),
+    "timestep": (0.001, 0.1, float,),
     "joint_damping": (0, np.inf, float),
-    # "torso_mass": (0.1, np.inf, float),
-    "timestep": (0.001, 0.1, float,),  # TODO not sure how much it should be varied
-    "magnetic_x": (-np.inf, np.inf, float),
-    "magnetic_y": (-np.inf, np.inf, float),
-    "magnetic_z": (-np.inf, np.inf, float),
+    "joint_stiffness": (0, np.inf, float),
+    "actuator_strength": (0, np.inf, float),
+    "density": (0, np.inf, float),
+    "viscosity": (0, np.inf, float),
+    "geom_density": (0, np.inf, float),
     "wind_x": (-np.inf, np.inf, float),
     "wind_y": (-np.inf, np.inf, float),
     "wind_z": (-np.inf, np.inf, float),
 }
 
+CONTEXT_MASK = [
+    "gravity",
+    "geom_density",
+    "wind_x",
+    "wind_y",
+    "wind_z",
+]
+
+
 class CARLDmcFishEnv(CARLDmcEnv):
     def __init__(
         self,
         domain: str = "fish",
         task: str = "upright_context",
         contexts: Dict[Any, Dict[Any, Any]] = {},
+        context_mask: Optional[List[str]] = [],
         hide_context: bool = False,
         add_gaussian_noise_to_context: bool = False,
         gaussian_noise_std_percentage: float = 0.01,
@@ -73,11 +78,12 @@ def __init__(
         if dict_observation_space:
             raise NotImplementedError
         else:
-            env = load_dmc_env(domain_name=domain, task_name=task, context={}, environment_kwargs={"flat_observation": True})
+            env = load_dmc_env(domain_name=domain, task_name=task, context={}, context_mask=[], environment_kwargs={"flat_observation": True})
             env = MujocoToGymWrapper(env)
         super().__init__(
             env=env,
             contexts=contexts,
+            context_mask=context_mask,
             hide_context=hide_context,
             add_gaussian_noise_to_context=add_gaussian_noise_to_context,
             gaussian_noise_std_percentage=gaussian_noise_std_percentage,
diff --git a/carl/envs/dmc/carl_dm_quadruped.py b/carl/envs/dmc/carl_dm_quadruped.py
index 0e241db2..6cdd619a 100644
--- a/carl/envs/dmc/carl_dm_quadruped.py
+++ b/carl/envs/dmc/carl_dm_quadruped.py
@@ -11,9 +11,9 @@
 
 DEFAULT_CONTEXT = {
     "gravity": -9.81,
-    "friction_tangential": 1, # Scaling factor for tangential friction of all geoms (objects)
-    "friction_torsional": 1, # Scaling factor for torsional friction of all geoms (objects)
-    "friction_rolling": 1, # Scaling factor for rolling friction of all geoms (objects)
+    "friction_tangential": 1., # Scaling factor for tangential friction of all geoms (objects)
+    "friction_torsional": 1., # Scaling factor for torsional friction of all geoms (objects)
+    "friction_rolling": 1., # Scaling factor for rolling friction of all geoms (objects)
     "timestep": 0.005,  # Seconds between updates
     "joint_damping": 1., # Scaling factor for all joints
     "joint_stiffness": 0.,
@@ -43,12 +43,20 @@
     "wind_z": (-np.inf, np.inf, float),
 }
 
+CONTEXT_MASK = [
+    "wind_x",
+    "wind_y",
+    "wind_z",
+]
+
+
 class CARLDmcQuadrupedEnv(CARLDmcEnv):
     def __init__(
         self,
         domain: str = "quadruped",
         task: str = "walk_context",
         contexts: Dict[Any, Dict[Any, Any]] = {},
+        context_mask: Optional[List[str]] = [],
         hide_context: bool = False,
         add_gaussian_noise_to_context: bool = False,
         gaussian_noise_std_percentage: float = 0.01,
@@ -68,11 +76,12 @@ def __init__(
         if dict_observation_space:
             raise NotImplementedError
         else:
-            env = load_dmc_env(domain_name=domain, task_name=task, context={}, environment_kwargs={"flat_observation": True})
+            env = load_dmc_env(domain_name=domain, task_name=task, context={}, context_mask=[], environment_kwargs={"flat_observation": True})
             env = MujocoToGymWrapper(env)
         super().__init__(
             env=env,
             contexts=contexts,
+            context_mask=context_mask,
             hide_context=hide_context,
             add_gaussian_noise_to_context=add_gaussian_noise_to_context,
             gaussian_noise_std_percentage=gaussian_noise_std_percentage,
diff --git a/carl/envs/dmc/carl_dm_walker.py b/carl/envs/dmc/carl_dm_walker.py
index 75d13667..bd7320e9 100644
--- a/carl/envs/dmc/carl_dm_walker.py
+++ b/carl/envs/dmc/carl_dm_walker.py
@@ -44,12 +44,19 @@
     "wind_z": (-np.inf, np.inf, float),
 }
 
+CONTEXT_MASK = [
+    "wind_x",
+    "wind_y",
+    "wind_z",
+]
+
 class CARLDmcWalkerEnv(CARLDmcEnv):
     def __init__(
         self,
         domain: str = "walker",
         task: str = "stand_context",
         contexts: Dict[Any, Dict[Any, Any]] = {},
+        context_mask: Optional[List[str]] = [],
         hide_context: bool = False,
         add_gaussian_noise_to_context: bool = False,
         gaussian_noise_std_percentage: float = 0.01,
@@ -69,11 +76,12 @@ def __init__(
         if dict_observation_space:
             raise NotImplementedError
         else:
-            env = load_dmc_env(domain_name=domain, task_name=task, context={}, environment_kwargs={"flat_observation": True})
+            env = load_dmc_env(domain_name=domain, task_name=task, context={}, context_mask=[], environment_kwargs={"flat_observation": True})
             env = MujocoToGymWrapper(env)
         super().__init__(
             env=env,
             contexts=contexts,
+            context_mask=context_mask,
             hide_context=hide_context,
             add_gaussian_noise_to_context=add_gaussian_noise_to_context,
             gaussian_noise_std_percentage=gaussian_noise_std_percentage,
diff --git a/carl/envs/dmc/dmc_tasks/fish.py b/carl/envs/dmc/dmc_tasks/fish.py
index b7475025..e1bd2060 100644
--- a/carl/envs/dmc/dmc_tasks/fish.py
+++ b/carl/envs/dmc/dmc_tasks/fish.py
@@ -24,7 +24,7 @@
 from dm_control.utils import containers
 from dm_control.utils import rewards
 import numpy as np
-from lxml import etree
+from carl.envs.dmc.dmc_tasks.utils import adapt_context
 
 
 _DEFAULT_TIME_LIMIT = 40
@@ -44,51 +44,13 @@ def get_model_and_assets():
   return common.read_model('fish.xml'), common.ASSETS
 
 
-def adapt_context(xml_string, context):
-  """Adapts and returns the xml_string of the model with the given context."""
-  mjcf = etree.fromstring(xml_string)
-  damping = mjcf.find("./default/default/joint")
-  damping.set("damping", str(context["joint_damping"]))
-#   friction = mjcf.find("./default/geom")
-#   friction.set("friction", " ".join([
-#     str(context["friction_tangential"]), 
-#     str(context["friction_torsional"]), 
-#     str(context["friction_rolling"])])
-#   )
-#   actuators = mjcf.findall("./actuator/motor")
-#   for actuator in actuators:
-#     gear = actuator.get("gear")
-#     actuator.set("gear", str(int(float(gear) * context["actuator_strength"])))
-  keys = []
-  options = mjcf.findall("./option")
-  magnetic = " ".join([str(context["magnetic_x"]), str(context["magnetic_y"]), str(context["magnetic_z"])])
-  wind = " ".join([str(context["wind_x"]), str(context["wind_y"]), str(context["wind_z"])])
-  for option in options:
-    for k, v in option.items():
-      keys.append(k)
-      if k == "timestep":
-        option.set("timestep", str(context["timestep"]))
-      elif k == "magnetic":
-        option.set("magnetic", magnetic)
-      elif k == "wind":
-        option.set("wind", wind)
-  if "timestep" not in keys:
-    mjcf.append(etree.Element("option", timestep=str(context["timestep"])))
-  if "magnetic" not in keys:
-    mjcf.append(etree.Element("option", magnetic=magnetic))
-  if "wind" not in keys:
-    mjcf.append(etree.Element("option", wind=wind))
-  xml_string = etree.tostring(mjcf, pretty_print=True)
-  return xml_string
-
-
 @SUITE.add('benchmarking')
-def upright_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None,
+def upright_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None,
             environment_kwargs=None):
   """Returns the Fish Upright task."""
   xml_string, assets = get_model_and_assets()
   if context != {}:
-    xml_string = adapt_context(xml_string, context)
+    xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
   physics = Physics.from_xml_string(xml_string, assets)
   task = Upright(random=random)
   environment_kwargs = environment_kwargs or {}
@@ -98,11 +60,11 @@ def upright_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None,
 
 
 @SUITE.add('benchmarking')
-def swim_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+def swim_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
   """Returns the Fish Swim task."""
   xml_string, assets = get_model_and_assets()
   if context != {}:
-    xml_string = adapt_context(xml_string, context)
+    xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
   physics = Physics.from_xml_string(xml_string, assets)
   task = Swim(random=random)
   environment_kwargs = environment_kwargs or {}
diff --git a/carl/envs/dmc/dmc_tasks/quadruped.py b/carl/envs/dmc/dmc_tasks/quadruped.py
index 264a9f6b..9f327ac6 100644
--- a/carl/envs/dmc/dmc_tasks/quadruped.py
+++ b/carl/envs/dmc/dmc_tasks/quadruped.py
@@ -95,11 +95,11 @@ def make_model(floor_size=None, terrain=False, rangefinders=False,
 
 
 @SUITE.add()
-def walk_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+def walk_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
   """Returns the Walk task with the adapted context."""
   xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED)
   if context != {}:
-    xml_string = adapt_context(xml_string, context)
+    xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
   physics = Physics.from_xml_string(xml_string, common.ASSETS)
   task = Move(desired_speed=_WALK_SPEED, random=random)
   environment_kwargs = environment_kwargs or {}
@@ -109,11 +109,11 @@ def walk_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, enviro
 
 
 @SUITE.add()
-def run_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+def run_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
   """Returns the Run task with the adapted context."""
   xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _RUN_SPEED)
   if context != {}:
-    xml_string = adapt_context(xml_string, context)
+    xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
   physics = Physics.from_xml_string(xml_string, common.ASSETS)
   task = Move(desired_speed=_RUN_SPEED, random=random)
   environment_kwargs = environment_kwargs or {}
@@ -123,12 +123,12 @@ def run_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, environ
 
 
 @SUITE.add()
-def escape_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None,
+def escape_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None,
            environment_kwargs=None):
   """Returns the Escape task with the adapted context."""
   xml_string = make_model(floor_size=40, terrain=True, rangefinders=True)
   if context != {}:
-    xml_string = adapt_context(xml_string, context)
+    xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
   physics = Physics.from_xml_string(xml_string, common.ASSETS)
   task = Escape(random=random)
   environment_kwargs = environment_kwargs or {}
@@ -138,11 +138,11 @@ def escape_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None,
 
 
 @SUITE.add()
-def fetch_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+def fetch_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
   """Returns the Fetch task with the adapted context."""
   xml_string = make_model(walls_and_ball=True)
   if context != {}:
-    xml_string = adapt_context(xml_string, context)
+    xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
   physics = Physics.from_xml_string(xml_string, common.ASSETS)
   task = Fetch(random=random)
   environment_kwargs = environment_kwargs or {}
diff --git a/carl/envs/dmc/dmc_tasks/utils.py b/carl/envs/dmc/dmc_tasks/utils.py
index bd7825bd..1efaf301 100644
--- a/carl/envs/dmc/dmc_tasks/utils.py
+++ b/carl/envs/dmc/dmc_tasks/utils.py
@@ -1,9 +1,7 @@
-from wsgiref.simple_server import demo_app
 from lxml import etree
-from torch import ge
 
 
-def adapt_context(xml_string, context):
+def adapt_context(xml_string, context, context_mask=[]):
   """Adapts and returns the xml_string of the model with the given context."""
   mjcf = etree.fromstring(xml_string)
   default = mjcf.find("./default/")
@@ -11,38 +9,46 @@ def adapt_context(xml_string, context):
     default = etree.Element("default")
     mjcf.addnext(default)
 
-  # adjust damping for all joints if damping is already an attribute
-  for joint_find in mjcf.findall(".//joint[@damping]"):
-    joint_damping = joint_find.get("damping")
-    joint_find.set("damping", str(float(joint_damping) * context["joint_damping"]))
+  if "joint_daming" not in context_mask:
+    # adjust damping for all joints if damping is already an attribute
+    for joint_find in mjcf.findall(".//joint[@damping]"):
+      joint_damping = joint_find.get("damping")
+      joint_find.set("damping", str(float(joint_damping) * context["joint_damping"]))
 
-  # adjust stiffness for all joints if stiffness is already an attribute
-  for joint_find in mjcf.findall(".//joint[@stiffness]"):
-    joint_stiffness = joint_find.get("stiffness")
-    joint_find.set("stiffness", str(float(joint_stiffness) * context["joint_stiffness"]))
+  if "joint_stiffness" not in context_mask:
+    # adjust stiffness for all joints if stiffness is already an attribute
+    for joint_find in mjcf.findall(".//joint[@stiffness]"):
+      joint_stiffness = joint_find.get("stiffness")
+      joint_find.set("stiffness", str(float(joint_stiffness) * context["joint_stiffness"]))
 
   # set default joint damping if default/joint is not present
   joint = mjcf.find("./default/joint")
   if joint is None:
     joint = etree.Element("joint")
     default.addnext(joint)
-    def_joint_damping = 0.1
-    default_joint_damping = str(float(def_joint_damping) * context["joint_damping"])
-    joint.set("damping", default_joint_damping)
-    default_joint_stiffness = str(context["joint_stiffness"])
-    joint.set("stiffness", default_joint_stiffness)
+    if "joint_daming" not in context_mask:
+      def_joint_damping = 0.1
+      default_joint_damping = str(float(def_joint_damping) * context["joint_damping"])
+      joint.set("damping", default_joint_damping)
+    if "joint_stiffness" not in context_mask:
+      default_joint_stiffness = str(context["joint_stiffness"])
+      joint.set("stiffness", default_joint_stiffness)
 
   # adjust friction for all geom elements with friction attribute
   for geom_find in mjcf.findall(".//geom[@friction]"):
     friction = geom_find.get("friction").split(" ")
     frict_str = ""
-    for f, d in zip(friction, [context["friction_tangential"]*2, context["friction_torsional"], context["friction_rolling"]]):
-      frict_str += str(float(f) * d) + " "
+    for i, (f, d) in enumerate(zip(friction, [context["friction_tangential"], context["friction_torsional"], context["friction_rolling"]])):
+      if (i == 0 and "friction_tangential" not in context_mask) or (i == 1 and "friction_torsional" not in context_mask) or (i == 2 and "friction_rolling" not in context_mask):
+        frict_str += str(float(f) * d) + " "
+      else:
+        frict_str += str(f) + " "
     geom_find.set("friction", frict_str[:-1])
 
-  # adjust density for all geom elements with density attribute
-  for geom_find in mjcf.findall(".//geom[@density]"):
-    geom_find.set("density", str(float(geom_find.get("density")) * context["geom_density"]))
+  if "geom_density" not in context_mask:
+    # adjust density for all geom elements with density attribute
+    for geom_find in mjcf.findall(".//geom[@density]"):
+      geom_find.set("density", str(float(geom_find.get("density")) * context["geom_density"]))
 
   # create default geom if it does not exist
   geom = mjcf.find("./default/geom")
@@ -56,53 +62,67 @@ def adapt_context(xml_string, context):
     default_friction_torsional = 0.005
     default_friction_rolling = 0.0001
     geom.set("friction", " ".join([
-      str(default_friction_tangential * context["friction_tangential"]), 
-      str(default_friction_torsional * context["friction_torsional"]), 
-      str(default_friction_rolling * context["friction_rolling"])])
-    )
+      (str(default_friction_tangential * context["friction_tangential"]) if "friction_tangential" not in context_mask else str(default_friction_tangential)),
+      (str(default_friction_torsional * context["friction_torsional"]) if "friction_torsional" not in context_mask else str(default_friction_torsional)),
+      (str(default_friction_rolling * context["friction_rolling"]) if "friction_rolling" not in context_mask else str(default_friction_rolling)),
+    ]))
 
-  # set default density
-  geom_density = geom.get("density")
-  if geom_density is None:
-    geom_density = 1000
-    geom.set("density", str(float(geom_density) * context["geom_density"]))
-
-  actuators = mjcf.findall("./actuator/")
-  for actuator in actuators:
-    gear = actuator.get("gear")
-    if gear is None:
-      gear = 1
-    actuator.set("gear", str(float(gear) * context["actuator_strength"]))
+  if "geom_density" not in context_mask:
+    # set default density
+    geom_density = geom.get("density")
+    if geom_density is None:
+      geom_density = 1000
+      geom.set("density", str(float(geom_density) * context["geom_density"]))
 
+  if "actuator_strength" not in context_mask:
+    # scale all actuators with the actuator strength factor
+    actuators = mjcf.findall("./actuator/")
+    for actuator in actuators:
+      gear = actuator.get("gear")
+      if gear is None:
+        gear = 1
+      actuator.set("gear", str(float(gear) * context["actuator_strength"]))
 
   # find option settings and override them if they exist, otherwise create new option
-  option_keys = []
-  options = mjcf.findall(".//option")
-  gravity = " ".join(["0", "0", str(context["gravity"])])
-  wind = " ".join([str(context["wind_x"]), str(context["wind_y"]), str(context["wind_z"])])
-  for option in options:
-    for k, _ in option.items():
-      option_keys.append(k)
-      if k == "gravity":
-        option.set("gravity", gravity)
-      elif k == "timestep":
-        option.set("timestep", str(context["timestep"]))
-      elif k == "density":
-        option.set("density", str(context["density"]))
-      elif k == "viscosity":
-        option.set("viscosity", str(context["viscosity"]))
-      elif k == "wind":
-        option.set("wind", wind)
-  if "gravity" not in option_keys:
-    mjcf.append(etree.Element("option", gravity=gravity))
-  if "timestep" not in option_keys:
-    mjcf.append(etree.Element("option", timestep=str(context["timestep"])))
-  if "wind" not in option_keys:
-    mjcf.append(etree.Element("option", wind=wind))
-  if "density" not in option_keys:
-    mjcf.append(etree.Element("option", density=str(context["density"])))
-  if "viscosity" not in option_keys:
-    mjcf.append(etree.Element("option", viscosity=str(context["viscosity"])))
+  option = mjcf.find(".//option")
+  if option is None:
+    option = etree.Element("option")
+    mjcf.append(option)
+
+  if "gravity" not in context_mask:
+    gravity = option.get("gravity")
+    if gravity is not None:
+      g = gravity.split(" ")
+      gravity = " ".join([g[0], g[1], str(context["gravity"])])
+    else:
+      gravity = " ".join(["0", "0", str(context["gravity"])])
+    option.set("gravity", gravity)
+  
+  if "wind" not in context_mask:
+    wind = option.get("wind")
+    if wind is not None:
+      w = wind.split(" ")
+      wind = " ".join([
+        (str(context["wind_x"]) if "wind_x" not in context_mask else w[0]),
+        (str(context["wind_y"]) if "wind_y" not in context_mask else w[1]),
+        (str(context["wind_z"]) if "wind_z" not in context_mask else w[2]),
+      ])
+    else:
+      wind = " ".join([
+        (str(context["wind_x"]) if "wind_x" not in context_mask else "0"),
+        (str(context["wind_y"]) if "wind_y" not in context_mask else "0"),
+        (str(context["wind_z"]) if "wind_z" not in context_mask else "0"),
+      ])
+    option.set("wind", wind)
+
+  if "timestep" not in context_mask:
+    option.set("timestep", str(context["timestep"]))
+  
+  if "density" not in context_mask:
+    option.set("density", str(context["density"]))
+  
+  if "viscosity" not in context_mask:
+    option.set("viscosity", str(context["viscosity"]))
 
   xml_string = etree.tostring(mjcf, pretty_print=True)
   return xml_string
diff --git a/carl/envs/dmc/dmc_tasks/walker.py b/carl/envs/dmc/dmc_tasks/walker.py
index 205eb9b1..ba162f89 100644
--- a/carl/envs/dmc/dmc_tasks/walker.py
+++ b/carl/envs/dmc/dmc_tasks/walker.py
@@ -48,11 +48,11 @@ def get_model_and_assets():
 
 
 @SUITE.add('benchmarking')
-def stand_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+def stand_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
   """Returns the Stand task with the adapted context."""
   xml_string, assets = get_model_and_assets()
   if context != {}:
-    xml_string = adapt_context(xml_string, context)
+    xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
   physics = Physics.from_xml_string(xml_string, assets)
   task = PlanarWalker(move_speed=0, random=random)
   environment_kwargs = environment_kwargs or {}
@@ -62,11 +62,11 @@ def stand_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, envir
 
 
 @SUITE.add('benchmarking')
-def walk_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+def walk_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
   """Returns the Walk task with the adapted context."""
   xml_string, assets = get_model_and_assets()
   if context != {}:
-    xml_string = adapt_context(xml_string, context)
+    xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
   physics = Physics.from_xml_string(xml_string, assets)
   task = PlanarWalker(move_speed=_WALK_SPEED, random=random)
   environment_kwargs = environment_kwargs or {}
@@ -76,11 +76,11 @@ def walk_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, enviro
 
 
 @SUITE.add('benchmarking')
-def run_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+def run_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
   """Returns the Run task with the adapted context."""
   xml_string, assets = get_model_and_assets()
   if context != {}:
-    xml_string = adapt_context(xml_string, context)
+    xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
   physics = Physics.from_xml_string(xml_string, assets)
   task = PlanarWalker(move_speed=_RUN_SPEED, random=random)
   environment_kwargs = environment_kwargs or {}
diff --git a/carl/envs/dmc/loader.py b/carl/envs/dmc/loader.py
index 5482a747..2988fe4a 100644
--- a/carl/envs/dmc/loader.py
+++ b/carl/envs/dmc/loader.py
@@ -21,7 +21,7 @@ def load_dmc_env(domain_name, task_name, context={}, context_mask=[], task_kwarg
         task_kwargs = task_kwargs or {}
         if environment_kwargs is not None:
             task_kwargs = dict(task_kwargs, environment_kwargs=environment_kwargs)
-        env = domain.SUITE[task_name](context=context, **task_kwargs)
+        env = domain.SUITE[task_name](context=context, context_mask=context_mask, **task_kwargs)
         env.task.visualize_reward = visualize_reward
         return env
     elif (domain_name, task_name) in suite.ALL_TASKS:
diff --git a/test_dm_control.py b/test_dm_control.py
index c2d27bde..2534f7ad 100644
--- a/test_dm_control.py
+++ b/test_dm_control.py
@@ -1,13 +1,16 @@
 import imp
-from carl.envs.dmc.carl_dm_cartpole import CARLDmcCartPoleEnv
-from carl.envs.dmc.carl_dm_walker import CARLDmcWalkerEnv
-from carl.envs.dmc.carl_dm_quadruped import CARLDmcQuadrupedEnv
-from carl.envs.dmc.carl_dm_fish import CARLDmcFishEnv
-from carl.envs.classic_control import CARLCartPoleEnv
+from carl.envs import CARLDmcCartPoleEnv
+from carl.envs import CARLDmcWalkerEnv
+from carl.envs import CARLDmcQuadrupedEnv
+from carl.envs import CARLDmcFishEnv
 from carl.envs import CARLDmcCartPoleEnv_defaults as cartpole_default
 from carl.envs import CARLDmcWalkerEnv_defaults as walker_default
 from carl.envs import CARLDmcQuadrupedEnv_defaults as quadruped_default
 from carl.envs import CARLDmcFishEnv_defaults as fish_default
+#from carl.envs import CARLDmcCartPoleEnv_mask as cartpole_default
+from carl.envs import CARLDmcWalkerEnv_mask as walker_mask
+from carl.envs import CARLDmcQuadrupedEnv_mask as quadruped_mask
+from carl.envs import CARLDmcFishEnv_mask as fish_mask
 import matplotlib.pyplot as plt
 
 if __name__ == "__main__":
@@ -18,18 +21,19 @@
     # contexts = {0: longer_pole}
     # carl_env = CARLDmcCartPoleEnv(task="swingup_context", contexts=contexts, hide_context=False)
     
-    stronger_act = walker_default.copy()
-    stronger_act["actuator_strength"] = walker_default["actuator_strength"]*2
-    contexts = {0: stronger_act}
-    carl_env = CARLDmcWalkerEnv(task="stand_context", contexts=contexts, hide_context=False)
-
     # stronger_act = walker_default.copy()
+    # stronger_act["actuator_strength"] = walker_default["actuator_strength"]*2
+    # contexts = {0: stronger_act}
+    # carl_env = CARLDmcWalkerEnv(task="stand_context", contexts=contexts, context_mask=walker_mask, hide_context=False)
+
+    # stronger_act = quadruped_default.copy()
     # stronger_act["actuator_strength"] = quadruped_default["actuator_strength"]*2
     # contexts = {0: stronger_act}
-    # carl_env = CARLDmcQuadrupedEnv(task="walk_context", contexts=contexts, hide_context=False)
+    # carl_env = CARLDmcQuadrupedEnv(task="walk_context", contexts=contexts, context_mask=quadruped_mask, hide_context=False)
+
+    contexts = {0: fish_default}
+    carl_env = CARLDmcFishEnv(task="swim_context", contexts=contexts, context_mask=fish_mask, hide_context=False)
 
-    # contexts = {0: fish_default}
-    # carl_env = CARLDmcFishEnv(task="swim_context", contexts=contexts, hide_context=False)
     render = lambda : plt.imshow(carl_env.render(mode='rgb_array'))
     s = carl_env.reset()
     render()

From 0073f298bab164643b00b626c09251c18898eb29 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Tue, 21 Jun 2022 11:04:41 +0200
Subject: [PATCH 24/37] remove cartpole env

---
 carl/envs/dmc/__init__.py           |   4 -
 carl/envs/dmc/carl_dm_cartpole.py   |  82 ------------
 carl/envs/dmc/dmc_tasks/cartpole.py | 200 ----------------------------
 carl/envs/dmc/loader.py             |   2 +-
 carl/envs/dmc/tasks.py              |  47 -------
 test_dm_control.py                  |  20 +--
 6 files changed, 7 insertions(+), 348 deletions(-)
 delete mode 100644 carl/envs/dmc/carl_dm_cartpole.py
 delete mode 100644 carl/envs/dmc/dmc_tasks/cartpole.py
 delete mode 100644 carl/envs/dmc/tasks.py

diff --git a/carl/envs/dmc/__init__.py b/carl/envs/dmc/__init__.py
index 9aa699fd..65b477c9 100644
--- a/carl/envs/dmc/__init__.py
+++ b/carl/envs/dmc/__init__.py
@@ -1,8 +1,4 @@
 # Contexts and bounds by name
-from carl.envs.dmc.carl_dm_cartpole import (
-    CARLDmcCartPoleEnv,
-    DEFAULT_CONTEXT as CARLDmcCartPoleEnv_defaults,
-)
 
 from carl.envs.dmc.carl_dm_walker import (
     CARLDmcWalkerEnv,
diff --git a/carl/envs/dmc/carl_dm_cartpole.py b/carl/envs/dmc/carl_dm_cartpole.py
deleted file mode 100644
index 2d4e3915..00000000
--- a/carl/envs/dmc/carl_dm_cartpole.py
+++ /dev/null
@@ -1,82 +0,0 @@
-from typing import Any, Dict, List, Optional, Union
-
-import numpy as np
-
-from carl.utils.trial_logger import TrialLogger
-from carl.context.selection import AbstractSelector
-from carl.envs.dmc.wrappers import MujocoToGymWrapper
-from carl.envs.dmc.loader import load_dmc_env
-from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv
-
-
-DEFAULT_CONTEXT = {
-    "gravity": -9.81,
-    "masscart": 1.0,  # Should be seen as 100% and scaled accordingly
-    "masspole": 0.1,  # Should be seen as 100% and scaled accordingly
-    "pole_length": 1.0,  # Should be seen as 100% and scaled accordingly
-    "actuator_strength": 1.0,
-    "timestep": 0.01,  # Seconds between updates
-    "wind_x": 0.,
-    "wind_y": 0.,
-    "wind_z": 0.,
-}
-
-CONTEXT_BOUNDS = {
-    "gravity": (-0.1, -np.inf, float),  # Negative gravity
-    "masscart": (0.1, 10, float),  # Cart mass can be varied by a factor of 10
-    "masspole": (0.01, 1, float),  # Pole mass can be varied by a factor of 10
-    "pole_length": (0.05, 5, float),  # Pole length can be varied by a factor of 10
-    "actuator_strength": (1, 100, int),  # Force magnifier can be varied by a factor of 10
-    "timestep": (0.001, 0.1, float,),
-    "wind_x": (-np.inf, np.inf, float),
-    "wind_y": (-np.inf, np.inf, float),
-    "wind_z": (-np.inf, np.inf, float),
-}
-
-
-class CARLDmcCartPoleEnv(CARLDmcEnv):
-    def __init__(
-        self,
-        domain: str = "cartpole",
-        task: str = "swingup_context",
-        contexts: Dict[Any, Dict[Any, Any]] = {},
-        hide_context: bool = False,
-        add_gaussian_noise_to_context: bool = False,
-        gaussian_noise_std_percentage: float = 0.01,
-        logger: Optional[TrialLogger] = None,
-        scale_context_features: str = "no",
-        default_context: Optional[Dict] = DEFAULT_CONTEXT,
-        max_episode_length: int = 500,  # from https://github.com/openai/gym/blob/master/gym/envs/__init__.py
-        state_context_features: Optional[List[str]] = None,
-        dict_observation_space: bool = False,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
-        context_selector_kwargs: Optional[Dict] = None,
-    ):
-        if not contexts:
-            contexts = {0: DEFAULT_CONTEXT}
-        self.domain = domain
-        self.task = task
-        if dict_observation_space:
-            raise NotImplementedError
-        else:
-            env = load_dmc_env(domain_name=domain, task_name=task, context={}, environment_kwargs={"flat_observation": True})
-            env = MujocoToGymWrapper(env)
-        super().__init__(
-            env=env,
-            contexts=contexts,
-            hide_context=hide_context,
-            add_gaussian_noise_to_context=add_gaussian_noise_to_context,
-            gaussian_noise_std_percentage=gaussian_noise_std_percentage,
-            logger=logger,
-            scale_context_features=scale_context_features,
-            default_context=default_context,
-            max_episode_length=max_episode_length,
-            state_context_features=state_context_features,
-            dict_observation_space=dict_observation_space,
-            context_selector=context_selector,
-            context_selector_kwargs=context_selector_kwargs,
-        )
-        # TODO check gaussian noise on context features
-        self.whitelist_gaussian_noise = list(
-            DEFAULT_CONTEXT.keys()
-        )  # allow to augment all values
diff --git a/carl/envs/dmc/dmc_tasks/cartpole.py b/carl/envs/dmc/dmc_tasks/cartpole.py
deleted file mode 100644
index 8ff6bc4d..00000000
--- a/carl/envs/dmc/dmc_tasks/cartpole.py
+++ /dev/null
@@ -1,200 +0,0 @@
-
-# Copyright 2017 The dm_control Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""Cartpole domain."""
-
-import collections
-from unittest.mock import DEFAULT
-from urllib.error import ContentTooShortError
-
-from dm_control import mujoco
-from dm_control.rl import control
-from dm_control.suite import base
-from dm_control.suite import common
-from dm_control.utils import containers
-from dm_control.utils import rewards
-from lxml import etree
-import numpy as np
-
-
-_DEFAULT_TIME_LIMIT = 10
-SUITE = containers.TaggedTasks()
-
-
-def get_model_and_assets(num_poles=1):
-  """Returns a tuple containing the model XML string and a dict of assets."""
-  return _make_model(num_poles), common.ASSETS
-
-@SUITE.add('benchmarking')
-def swingup_context(context={}, time_limit=_DEFAULT_TIME_LIMIT, random=None,
-            environment_kwargs=None):
-  """Returns the Cartpole Swing-Up task."""
-  xml_string, assets = get_model_and_assets()
-  if context != {}:
-    mjcf = etree.fromstring(xml_string)
-    pole = mjcf.find("./default/default/geom")
-    pole.set("mass", str(context["masspole"]))
-    pole.set("fromto", "0 0 0 0 0 " + str(context["pole_length"]))
-    cart = mjcf.find("./worldbody/body/geom")
-    cart.set("mass", str(context["masscart"]))
-    force = mjcf.find("./actuator/motor")
-    gear = force.get("gear")
-    force.set("gear", str(int(float(gear) * context["actuator_strength"])))
-    keys = []
-    options = mjcf.findall("./option")
-    gravity = " ".join(["0", "0", str(context["gravity"])])
-    wind = " ".join([str(context["wind_x"]), str(context["wind_y"]), str(context["wind_z"])])
-    for option in options:
-      for k, v in option.items():
-        keys.append(k)
-        if k == "gravity":
-          option.set("gravity", gravity)
-        elif k == "timestep":
-          option.set("timestep", str(context["timestep"]))
-        elif k == "wind":
-          option.set("wind", wind)
-    if "gravity" not in keys:
-      mjcf.append(etree.Element("option", gravity=gravity))
-    if "timestep" not in keys:
-      mjcf.append(etree.Element("option", timestep=str(context["timestep"])))
-    if "wind" not in keys:
-      mjcf.append(etree.Element("option", wind=wind))
-    xml_string = etree.tostring(mjcf, pretty_print=True)
-    
-  physics = Physics.from_xml_string(xml_string, assets)
-  task = Balance(swing_up=True, sparse=False, random=random)
-  environment_kwargs = environment_kwargs or {}
-  return control.Environment(
-      physics, task, time_limit=time_limit, **environment_kwargs)
-
-
-def _make_model(n_poles):
-  """Generates an xml string defining a cart with `n_poles` bodies."""
-  xml_string = common.read_model('cartpole.xml')
-  if n_poles == 1:
-    return xml_string
-  mjcf = etree.fromstring(xml_string)
-  parent = mjcf.find('./worldbody/body/body')  # Find first pole.
-  # Make chain of poles.
-  for pole_index in range(2, n_poles+1):
-    child = etree.Element('body', name='pole_{}'.format(pole_index),
-                          pos='0 0 1', childclass='pole')
-    etree.SubElement(child, 'joint', name='hinge_{}'.format(pole_index))
-    etree.SubElement(child, 'geom', name='pole_{}'.format(pole_index))
-    parent.append(child)
-    parent = child
-  # Move plane down.
-  floor = mjcf.find('./worldbody/geom')
-  floor.set('pos', '0 0 {}'.format(1 - n_poles - .05))
-  # Move cameras back.
-  cameras = mjcf.findall('./worldbody/camera')
-  cameras[0].set('pos', '0 {} 1'.format(-1 - 2*n_poles))
-  cameras[1].set('pos', '0 {} 2'.format(-2*n_poles))
-  return etree.tostring(mjcf, pretty_print=True)
-
-
-class Physics(mujoco.Physics):
-  """Physics simulation with additional features for the Cartpole domain."""
-
-  def cart_position(self):
-    """Returns the position of the cart."""
-    return self.named.data.qpos['slider'][0]
-
-  def angular_vel(self):
-    """Returns the angular velocity of the pole."""
-    return self.data.qvel[1:]
-
-  def pole_angle_cosine(self):
-    """Returns the cosine of the pole angle."""
-    return self.named.data.xmat[2:, 'zz']
-
-  def bounded_position(self):
-    """Returns the state, with pole angle split into sin/cos."""
-    return np.hstack((self.cart_position(),
-                      self.named.data.xmat[2:, ['zz', 'xz']].ravel()))
-
-
-class Balance(base.Task):
-  """A Cartpole `Task` to balance the pole.
-  State is initialized either close to the target configuration or at a random
-  configuration.
-  """
-  _CART_RANGE = (-.25, .25)
-  _ANGLE_COSINE_RANGE = (.995, 1)
-
-  def __init__(self, swing_up, sparse, random=None):
-    """Initializes an instance of `Balance`.
-    Args:
-      swing_up: A `bool`, which if `True` sets the cart to the middle of the
-        slider and the pole pointing towards the ground. Otherwise, sets the
-        cart to a random position on the slider and the pole to a random
-        near-vertical position.
-      sparse: A `bool`, whether to return a sparse or a smooth reward.
-      random: Optional, either a `numpy.random.RandomState` instance, an
-        integer seed for creating a new `RandomState`, or None to select a seed
-        automatically (default).
-    """
-    self._sparse = sparse
-    self._swing_up = swing_up
-    super().__init__(random=random)
-
-  def initialize_episode(self, physics):
-    """Sets the state of the environment at the start of each episode.
-    Initializes the cart and pole according to `swing_up`, and in both cases
-    adds a small random initial velocity to break symmetry.
-    Args:
-      physics: An instance of `Physics`.
-    """
-    nv = physics.model.nv
-    if self._swing_up:
-      physics.named.data.qpos['slider'] = .01*self.random.randn()
-      physics.named.data.qpos['hinge_1'] = np.pi + .01*self.random.randn()
-      physics.named.data.qpos[2:] = .1*self.random.randn(nv - 2)
-    else:
-      physics.named.data.qpos['slider'] = self.random.uniform(-.1, .1)
-      physics.named.data.qpos[1:] = self.random.uniform(-.034, .034, nv - 1)
-    physics.named.data.qvel[:] = 0.01 * self.random.randn(physics.model.nv)
-    super().initialize_episode(physics)
-
-  def get_observation(self, physics):
-    """Returns an observation of the (bounded) physics state."""
-    obs = collections.OrderedDict()
-    obs['position'] = physics.bounded_position()
-    obs['velocity'] = physics.velocity()
-    return obs
-
-  def _get_reward(self, physics, sparse):
-    if sparse:
-      cart_in_bounds = rewards.tolerance(physics.cart_position(),
-                                         self._CART_RANGE)
-      angle_in_bounds = rewards.tolerance(physics.pole_angle_cosine(),
-                                          self._ANGLE_COSINE_RANGE).prod()
-      return cart_in_bounds * angle_in_bounds
-    else:
-      upright = (physics.pole_angle_cosine() + 1) / 2
-      centered = rewards.tolerance(physics.cart_position(), margin=2)
-      centered = (1 + centered) / 2
-      small_control = rewards.tolerance(physics.control(), margin=1,
-                                        value_at_margin=0,
-                                        sigmoid='quadratic')[0]
-      small_control = (4 + small_control) / 5
-      small_velocity = rewards.tolerance(physics.angular_vel(), margin=5).min()
-      small_velocity = (1 + small_velocity) / 2
-      return upright.mean() * small_control * small_velocity * centered
-
-  def get_reward(self, physics):
-    """Returns a sparse or a smooth reward, as specified in the constructor."""
-    return self._get_reward(physics, sparse=self._sparse)
\ No newline at end of file
diff --git a/carl/envs/dmc/loader.py b/carl/envs/dmc/loader.py
index 2988fe4a..c19604ed 100644
--- a/carl/envs/dmc/loader.py
+++ b/carl/envs/dmc/loader.py
@@ -2,7 +2,7 @@
 
 from dm_control import suite
 
-from carl.envs.dmc.dmc_tasks import cartpole, walker, quadruped, fish
+from carl.envs.dmc.dmc_tasks import walker, quadruped, fish
 
 _DOMAINS = {name: module for name, module in locals().items() 
             if inspect.ismodule(module) and hasattr(module, 'SUITE')}
diff --git a/carl/envs/dmc/tasks.py b/carl/envs/dmc/tasks.py
deleted file mode 100644
index 25dd9b39..00000000
--- a/carl/envs/dmc/tasks.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from dm_control import suite
-
-from carl.envs.dmc.carl_dmcontrol import CARLDmc
-
-
-def load_dmc_env(domain_name, task_name, task_kwargs=None, environment_kwargs=None,
-                 visualize_reward=False):
-    return suite.load(
-        domain_name=domain_name,
-        task_name=task_name,
-        task_kwargs=task_kwargs,
-        environment_kwargs=environment_kwargs,
-        visualize_reward=visualize_reward,
-    )
-
-
-def load_dmc_cartpole():
-    return load_dmc_env(domain_name="cartpole", task_name="swingup")
-
-# TODO Find a good method how to define tasks. Define classes? Better, create an automatic class constructor
-
-
-if __name__ == "__main__":
-    # Load one task:
-    env = load_dmc_cartpole()
-    carl_env = CARLDmc(env=env)
-
-    s = carl_env.reset()
-    done = False
-    while not done:
-        action = carl_env.action_space.sample()
-        state, reward, done, info = carl_env.step(action=action)
-        print(reward, done)
-
-    # # Iterate over a task set:
-    # for domain_name, task_name in suite.BENCHMARKING:
-    #     env = suite.load(domain_name, task_name)
-    #
-    # # Step through an episode and print out reward, discount and observation.
-    # action_spec = env.action_spec()
-    # time_step = env.reset()
-    # while not time_step.last():
-    #     action = np.random.uniform(
-    #         action_spec.minimum, action_spec.maximum, size=action_spec.shape
-    #     )
-    #     time_step = env.step(action)
-    #     print(time_step.reward, time_step.discount, time_step.observation)
diff --git a/test_dm_control.py b/test_dm_control.py
index 2534f7ad..fc48dd62 100644
--- a/test_dm_control.py
+++ b/test_dm_control.py
@@ -1,13 +1,10 @@
 import imp
-from carl.envs import CARLDmcCartPoleEnv
 from carl.envs import CARLDmcWalkerEnv
 from carl.envs import CARLDmcQuadrupedEnv
 from carl.envs import CARLDmcFishEnv
-from carl.envs import CARLDmcCartPoleEnv_defaults as cartpole_default
 from carl.envs import CARLDmcWalkerEnv_defaults as walker_default
 from carl.envs import CARLDmcQuadrupedEnv_defaults as quadruped_default
 from carl.envs import CARLDmcFishEnv_defaults as fish_default
-#from carl.envs import CARLDmcCartPoleEnv_mask as cartpole_default
 from carl.envs import CARLDmcWalkerEnv_mask as walker_mask
 from carl.envs import CARLDmcQuadrupedEnv_mask as quadruped_mask
 from carl.envs import CARLDmcFishEnv_mask as fish_mask
@@ -15,24 +12,19 @@
 
 if __name__ == "__main__":
     # Load one task:
-
-    # longer_pole = cartpole_default.copy()
-    # longer_pole["pole_length"] = cartpole_default["pole_length"]*2
-    # contexts = {0: longer_pole}
-    # carl_env = CARLDmcCartPoleEnv(task="swingup_context", contexts=contexts, hide_context=False)
     
-    # stronger_act = walker_default.copy()
-    # stronger_act["actuator_strength"] = walker_default["actuator_strength"]*2
-    # contexts = {0: stronger_act}
-    # carl_env = CARLDmcWalkerEnv(task="stand_context", contexts=contexts, context_mask=walker_mask, hide_context=False)
+    stronger_act = walker_default.copy()
+    stronger_act["actuator_strength"] = walker_default["actuator_strength"]*2
+    contexts = {0: stronger_act}
+    carl_env = CARLDmcWalkerEnv(task="stand_context", contexts=contexts, context_mask=walker_mask, hide_context=False)
 
     # stronger_act = quadruped_default.copy()
     # stronger_act["actuator_strength"] = quadruped_default["actuator_strength"]*2
     # contexts = {0: stronger_act}
     # carl_env = CARLDmcQuadrupedEnv(task="walk_context", contexts=contexts, context_mask=quadruped_mask, hide_context=False)
 
-    contexts = {0: fish_default}
-    carl_env = CARLDmcFishEnv(task="swim_context", contexts=contexts, context_mask=fish_mask, hide_context=False)
+    # contexts = {0: fish_default}
+    # carl_env = CARLDmcFishEnv(task="swim_context", contexts=contexts, context_mask=fish_mask, hide_context=False)
 
     render = lambda : plt.imshow(carl_env.render(mode='rgb_array'))
     s = carl_env.reset()

From af933201989dc9e143dce05350ab329b35cec05d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Tue, 21 Jun 2022 11:43:03 +0200
Subject: [PATCH 25/37] formatting

---
 carl/envs/dmc/carl_dm_fish.py        |  24 +-
 carl/envs/dmc/carl_dm_quadruped.py   |  20 +-
 carl/envs/dmc/carl_dm_walker.py      |  22 +-
 carl/envs/dmc/carl_dmcontrol.py      | 133 +----
 carl/envs/dmc/dmc_tasks/fish.py      | 226 ++++----
 carl/envs/dmc/dmc_tasks/quadruped.py | 746 +++++++++++++--------------
 carl/envs/dmc/dmc_tasks/utils.py     | 256 ++++-----
 carl/envs/dmc/dmc_tasks/walker.py    | 189 ++++---
 carl/envs/dmc/loader.py              |   3 +-
 carl/envs/dmc/wrappers.py            |   4 +-
 10 files changed, 769 insertions(+), 854 deletions(-)

diff --git a/carl/envs/dmc/carl_dm_fish.py b/carl/envs/dmc/carl_dm_fish.py
index 2960aa17..d557295f 100644
--- a/carl/envs/dmc/carl_dm_fish.py
+++ b/carl/envs/dmc/carl_dm_fish.py
@@ -10,17 +10,17 @@
 
 
 DEFAULT_CONTEXT = {
-    "gravity": -9.81, # Gravity is disabled via flag
-    "friction_tangential": 1, # Scaling factor for tangential friction of all geoms (objects)
-    "friction_torsional": 1, # Scaling factor for torsional friction of all geoms (objects)
-    "friction_rolling": 1, # Scaling factor for rolling friction of all geoms (objects)
+    "gravity": -9.81,  # Gravity is disabled via flag
+    "friction_tangential": 1,  # Scaling factor for tangential friction of all geoms (objects)
+    "friction_torsional": 1,  # Scaling factor for torsional friction of all geoms (objects)
+    "friction_rolling": 1,  # Scaling factor for rolling friction of all geoms (objects)
     "timestep": 0.004,  # Seconds between updates
-    "joint_damping": 1., # Scaling factor for all joints
+    "joint_damping": 1.,  # Scaling factor for all joints
     "joint_stiffness": 0.,
-    "actuator_strength": 1, # Scaling factor for all actuators in the model
-    "density": 6000.,
+    "actuator_strength": 1,  # Scaling factor for all actuators in the model
+    "density": 5000.,
     "viscosity": 0.,
-    "geom_density": 1., # No effect, because no gravity
+    "geom_density": 1.,  # No effect, because no gravity
     "wind_x": 0.,
     "wind_y": 0.,
     "wind_z": 0.,
@@ -78,7 +78,13 @@ def __init__(
         if dict_observation_space:
             raise NotImplementedError
         else:
-            env = load_dmc_env(domain_name=domain, task_name=task, context={}, context_mask=[], environment_kwargs={"flat_observation": True})
+            env = load_dmc_env(
+                domain_name=domain,
+                task_name=task,
+                context={},
+                context_mask=[],
+                environment_kwargs={"flat_observation": True}
+            )
             env = MujocoToGymWrapper(env)
         super().__init__(
             env=env,
diff --git a/carl/envs/dmc/carl_dm_quadruped.py b/carl/envs/dmc/carl_dm_quadruped.py
index 6cdd619a..95dd149b 100644
--- a/carl/envs/dmc/carl_dm_quadruped.py
+++ b/carl/envs/dmc/carl_dm_quadruped.py
@@ -11,16 +11,16 @@
 
 DEFAULT_CONTEXT = {
     "gravity": -9.81,
-    "friction_tangential": 1., # Scaling factor for tangential friction of all geoms (objects)
-    "friction_torsional": 1., # Scaling factor for torsional friction of all geoms (objects)
-    "friction_rolling": 1., # Scaling factor for rolling friction of all geoms (objects)
+    "friction_tangential": 1.,  # Scaling factor for tangential friction of all geoms (objects)
+    "friction_torsional": 1.,  # Scaling factor for torsional friction of all geoms (objects)
+    "friction_rolling": 1.,  # Scaling factor for rolling friction of all geoms (objects)
     "timestep": 0.005,  # Seconds between updates
-    "joint_damping": 1., # Scaling factor for all joints
+    "joint_damping": 1.,  # Scaling factor for all joints
     "joint_stiffness": 0.,
-    "actuator_strength": 1, # Scaling factor for all actuators in the model
+    "actuator_strength": 1,  # Scaling factor for all actuators in the model
     "density": 0.,
     "viscosity": 0.,
-    "geom_density": 1., # Scaling factor for all geom (objects) densities
+    "geom_density": 1.,  # Scaling factor for all geom (objects) densities
     "wind_x": 0.,
     "wind_y": 0.,
     "wind_z": 0.,
@@ -76,7 +76,13 @@ def __init__(
         if dict_observation_space:
             raise NotImplementedError
         else:
-            env = load_dmc_env(domain_name=domain, task_name=task, context={}, context_mask=[], environment_kwargs={"flat_observation": True})
+            env = load_dmc_env(
+                domain_name=domain,
+                task_name=task,
+                context={},
+                context_mask=[],
+                environment_kwargs={"flat_observation": True}
+            )
             env = MujocoToGymWrapper(env)
         super().__init__(
             env=env,
diff --git a/carl/envs/dmc/carl_dm_walker.py b/carl/envs/dmc/carl_dm_walker.py
index bd7320e9..0083866c 100644
--- a/carl/envs/dmc/carl_dm_walker.py
+++ b/carl/envs/dmc/carl_dm_walker.py
@@ -1,4 +1,3 @@
-from ast import JoinedStr
 from typing import Any, Dict, List, Optional, Union
 
 import numpy as np
@@ -12,16 +11,16 @@
 
 DEFAULT_CONTEXT = {
     "gravity": -9.81,
-    "friction_tangential": 1., # Scaling factor for tangential friction of all geoms (objects)
-    "friction_torsional": 1., # Scaling factor for torsional friction of all geoms (objects)
-    "friction_rolling": 1., # Scaling factor for rolling friction of all geoms (objects)
+    "friction_tangential": 1.,  # Scaling factor for tangential friction of all geoms (objects)
+    "friction_torsional": 1.,  # Scaling factor for torsional friction of all geoms (objects)
+    "friction_rolling": 1.,  # Scaling factor for rolling friction of all geoms (objects)
     "timestep": 0.0025,  # Seconds between updates
-    "joint_damping": 1., # Scaling factor for all joints
+    "joint_damping": 1.,  # Scaling factor for all joints
     "joint_stiffness": 0.,
-    "actuator_strength": 1., # Scaling factor for all actuators in the model
+    "actuator_strength": 1.,  # Scaling factor for all actuators in the model
     "density": 0.,
     "viscosity": 0.,
-    "geom_density": 1., # Scaling factor for all geom (objects) densities
+    "geom_density": 1.,  # Scaling factor for all geom (objects) densities
     "wind_x": 0.,
     "wind_y": 0.,
     "wind_z": 0.,
@@ -50,6 +49,7 @@
     "wind_z",
 ]
 
+
 class CARLDmcWalkerEnv(CARLDmcEnv):
     def __init__(
         self,
@@ -76,7 +76,13 @@ def __init__(
         if dict_observation_space:
             raise NotImplementedError
         else:
-            env = load_dmc_env(domain_name=domain, task_name=task, context={}, context_mask=[], environment_kwargs={"flat_observation": True})
+            env = load_dmc_env(
+                domain_name=domain,
+                task_name=task,
+                context={},
+                context_mask=[],
+                environment_kwargs={"flat_observation": True}
+            )
             env = MujocoToGymWrapper(env)
         super().__init__(
             env=env,
diff --git a/carl/envs/dmc/carl_dmcontrol.py b/carl/envs/dmc/carl_dmcontrol.py
index 47e35947..c3c7f056 100644
--- a/carl/envs/dmc/carl_dmcontrol.py
+++ b/carl/envs/dmc/carl_dmcontrol.py
@@ -1,13 +1,6 @@
-import warnings
-from typing import Any, Dict, List, TypeVar, Union, Optional
-
-import inspect
-import numpy as np
-from dm_control import suite
-from dm_control.utils import containers
+from typing import Any, Dict, List, Union, Optional
 
 import gym
-from gym.envs.classic_control import CartPoleEnv
 
 from carl.envs.carl_env import CARLEnv
 from carl.envs.dmc.wrappers import MujocoToGymWrapper
@@ -15,122 +8,6 @@
 from carl.utils.trial_logger import TrialLogger
 from carl.context.selection import AbstractSelector
 
-from carl.envs.dmc.wrappers import ActType, ObsType
-
-# ObsType = TypeVar("ObsType")
-# ActType = TypeVar("ActType")
-
-
-"""
-Physics options (defaults for CartPole):
-|           apirate = 100.0                                                │
-│         collision = 0                                                    │
-│              cone = 0                                                    │
-│           density = 0.0                                                  │
-│      disableflags = 0                                                    │
-│       enableflags = 0                                                    │
-│           gravity = array([ 0.  ,  0.  , -9.81])                         │
-│          impratio = 1.0                                                  │
-│        integrator = 0                                                    │
-│        iterations = 100                                                  │
-│          jacobian = 2                                                    │
-│          magnetic = array([ 0. , -0.5,  0. ])                            │
-│    mpr_iterations = 50                                                   │
-│     mpr_tolerance = 1e-06                                                │
-│ noslip_iterations = 0                                                    │
-│  noslip_tolerance = 1e-06                                                │
-│          o_margin = 0.0                                                  │
-│          o_solimp = array([9.0e-01, 9.5e-01, 1.0e-03, 5.0e-01, 2.0e+00]) │
-│          o_solref = array([0.02, 1.  ])                                  │
-│            solver = 2                                                    │
-│          timestep = 0.0025                                               │
-│         tolerance = 1e-08                                                │
-│         viscosity = 0.0                                                  │
-│              wind = array([0., 0., 0.])                                  |
-
-
-C++ Implementation:
-https://mujoco.readthedocs.io/en/latest/APIreference.html#mjoption
-struct _mjOption                    // physics options
-{
-    // timing parameters
-    mjtNum timestep;                // timestep
-    mjtNum apirate;                 // update rate for remote API (Hz)
-
-    // solver parameters
-    mjtNum impratio;                // ratio of friction-to-normal contact impedance
-    mjtNum tolerance;               // main solver tolerance
-    mjtNum noslip_tolerance;        // noslip solver tolerance
-    mjtNum mpr_tolerance;           // MPR solver tolerance
-
-    // physical constants
-    mjtNum gravity[3];              // gravitational acceleration
-    mjtNum wind[3];                 // wind (for lift, drag and viscosity)
-    mjtNum magnetic[3];             // global magnetic flux
-    mjtNum density;                 // density of medium
-    mjtNum viscosity;               // viscosity of medium
-
-    // override contact solver parameters (if enabled)
-    mjtNum o_margin;                // margin
-    mjtNum o_solref[mjNREF];        // solref
-    mjtNum o_solimp[mjNIMP];        // solimp
-
-    // discrete settings
-    int integrator;                 // integration mode (mjtIntegrator)
-    int collision;                  // collision mode (mjtCollision)
-    int cone;                       // type of friction cone (mjtCone)
-    int jacobian;                   // type of Jacobian (mjtJacobian)
-    int solver;                     // solver algorithm (mjtSolver)
-    int iterations;                 // maximum number of main solver iterations
-    int noslip_iterations;          // maximum number of noslip solver iterations
-    int mpr_iterations;             // maximum number of MPR solver iterations
-    int disableflags;               // bit flags for disabling standard features
-    int enableflags;                // bit flags for enabling optional features
-};
-typedef struct _mjOption mjOption;
-"""
-TIMING_PARAMETERS = [
-    "timestep",  # timestep
-    "apirate",  # update rate for remote API (Hz)
-]
-SOLVER_PARAMETERS = [
-    "impratio",  # ratio of friction-to-normal contact impedance
-    "tolerance",  # main solver tolerance
-    "noslip_tolerance",  # noslip solver tolerance
-    "mpr_tolerance",  # MPR solver tolerance
-]
-PHYSICAL_CONSTANTS = [
-    "gravity",
-    "wind",
-    "magnetic",
-    "density",
-    "viscosity",
-]
-OVERRIDE_CONTACT_SOLVER_PARAMETERS = [  # (if enabled)
-    "o_margin",  # margin
-    "o_solref",  # solref
-    "o_solimp",  # solimp
-]
-DISCRETE_SETTINGS = [
-    "integrator",  # integration mode (mjtIntegrator)
-    "collision",  # collision mode (mjtCollision)
-    "cone",  # type of friction cone (mjtCone)
-    "jacobian",  # type of Jacobian (mjtJacobian)
-    "solver",  # solver algorithm (mjtSolver)
-    "iterations",  # maximum number of main solver iterations
-    "noslip_iterations",  # maximum number of noslip solver iterations
-    "mpr_iterations",  # maximum number of MPR solver iterations
-    "disableflags",  # bit flags for disabling standard features
-    "enableflags",  # bit flags for enabling optional features
-]
-
-WORLD_PARAMETERS = (
-    TIMING_PARAMETERS
-    + SOLVER_PARAMETERS
-    + PHYSICAL_CONSTANTS
-    + OVERRIDE_CONTACT_SOLVER_PARAMETERS
-    + TIMING_PARAMETERS
-)
 
 class CARLDmcEnv(CARLEnv):
     def __init__(
@@ -173,5 +50,11 @@ def _update_context(self) -> None:
         if self.dict_observation_space:
             raise NotImplementedError
         else:
-            env = load_dmc_env(domain_name=self.domain, task_name=self.task, context=self.context, context_mask=self.context_mask, environment_kwargs={"flat_observation": True})
+            env = load_dmc_env(
+                domain_name=self.domain,
+                task_name=self.task,
+                context=self.context,
+                context_mask=self.context_mask,
+                environment_kwargs={"flat_observation": True}
+            )
             self.env = MujocoToGymWrapper(env)
diff --git a/carl/envs/dmc/dmc_tasks/fish.py b/carl/envs/dmc/dmc_tasks/fish.py
index e1bd2060..869511f0 100644
--- a/carl/envs/dmc/dmc_tasks/fish.py
+++ b/carl/envs/dmc/dmc_tasks/fish.py
@@ -40,138 +40,138 @@
 
 
 def get_model_and_assets():
-  """Returns a tuple containing the model XML string and a dict of assets."""
-  return common.read_model('fish.xml'), common.ASSETS
+    """Returns a tuple containing the model XML string and a dict of assets."""
+    return common.read_model('fish.xml'), common.ASSETS
 
 
 @SUITE.add('benchmarking')
 def upright_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None,
-            environment_kwargs=None):
-  """Returns the Fish Upright task."""
-  xml_string, assets = get_model_and_assets()
-  if context != {}:
-    xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
-  physics = Physics.from_xml_string(xml_string, assets)
-  task = Upright(random=random)
-  environment_kwargs = environment_kwargs or {}
-  return control.Environment(
-      physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
-      **environment_kwargs)
+                    environment_kwargs=None):
+    """Returns the Fish Upright task."""
+    xml_string, assets = get_model_and_assets()
+    if context != {}:
+        xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
+    physics = Physics.from_xml_string(xml_string, assets)
+    task = Upright(random=random)
+    environment_kwargs = environment_kwargs or {}
+    return control.Environment(
+        physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
+        **environment_kwargs)
 
 
 @SUITE.add('benchmarking')
 def swim_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
-  """Returns the Fish Swim task."""
-  xml_string, assets = get_model_and_assets()
-  if context != {}:
-    xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
-  physics = Physics.from_xml_string(xml_string, assets)
-  task = Swim(random=random)
-  environment_kwargs = environment_kwargs or {}
-  return control.Environment(
-      physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
-      **environment_kwargs)
+    """Returns the Fish Swim task."""
+    xml_string, assets = get_model_and_assets()
+    if context != {}:
+        xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
+    physics = Physics.from_xml_string(xml_string, assets)
+    task = Swim(random=random)
+    environment_kwargs = environment_kwargs or {}
+    return control.Environment(
+        physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
+        **environment_kwargs)
 
 
 class Physics(mujoco.Physics):
-  """Physics simulation with additional features for the Fish domain."""
+    """Physics simulation with additional features for the Fish domain."""
 
-  def upright(self):
-    """Returns projection from z-axes of torso to the z-axes of worldbody."""
-    return self.named.data.xmat['torso', 'zz']
+    def upright(self):
+        """Returns projection from z-axes of torso to the z-axes of worldbody."""
+        return self.named.data.xmat['torso', 'zz']
 
-  def torso_velocity(self):
-    """Returns velocities and angular velocities of the torso."""
-    return self.data.sensordata
+    def torso_velocity(self):
+        """Returns velocities and angular velocities of the torso."""
+        return self.data.sensordata
 
-  def joint_velocities(self):
-    """Returns the joint velocities."""
-    return self.named.data.qvel[_JOINTS]
+    def joint_velocities(self):
+        """Returns the joint velocities."""
+        return self.named.data.qvel[_JOINTS]
 
-  def joint_angles(self):
-    """Returns the joint positions."""
-    return self.named.data.qpos[_JOINTS]
+    def joint_angles(self):
+        """Returns the joint positions."""
+        return self.named.data.qpos[_JOINTS]
 
-  def mouth_to_target(self):
-    """Returns a vector, from mouth to target in local coordinate of mouth."""
-    data = self.named.data
-    mouth_to_target_global = data.geom_xpos['target'] - data.geom_xpos['mouth']
-    return mouth_to_target_global.dot(data.geom_xmat['mouth'].reshape(3, 3))
+    def mouth_to_target(self):
+        """Returns a vector, from mouth to target in local coordinate of mouth."""
+        data = self.named.data
+        mouth_to_target_global = data.geom_xpos['target'] - data.geom_xpos['mouth']
+        return mouth_to_target_global.dot(data.geom_xmat['mouth'].reshape(3, 3))
 
 
 class Upright(base.Task):
-  """A Fish `Task` for getting the torso upright with smooth reward."""
-
-  def __init__(self, random=None):
-    """Initializes an instance of `Upright`.
-    Args:
-      random: Either an existing `numpy.random.RandomState` instance, an
-        integer seed for creating a new `RandomState`, or None to select a seed
-        automatically.
-    """
-    super().__init__(random=random)
-
-  def initialize_episode(self, physics):
-    """Randomizes the tail and fin angles and the orientation of the Fish."""
-    quat = self.random.randn(4)
-    physics.named.data.qpos['root'][3:7] = quat / np.linalg.norm(quat)
-    for joint in _JOINTS:
-      physics.named.data.qpos[joint] = self.random.uniform(-.2, .2)
-    # Hide the target. It's irrelevant for this task.
-    physics.named.model.geom_rgba['target', 3] = 0
-    super().initialize_episode(physics)
-
-  def get_observation(self, physics):
-    """Returns an observation of joint angles, velocities and uprightness."""
-    obs = collections.OrderedDict()
-    obs['joint_angles'] = physics.joint_angles()
-    obs['upright'] = physics.upright()
-    obs['velocity'] = physics.velocity()
-    return obs
-
-  def get_reward(self, physics):
-    """Returns a smooth reward."""
-    return rewards.tolerance(physics.upright(), bounds=(1, 1), margin=1)
+    """A Fish `Task` for getting the torso upright with smooth reward."""
+
+    def __init__(self, random=None):
+        """Initializes an instance of `Upright`.
+        Args:
+          random: Either an existing `numpy.random.RandomState` instance, an
+            integer seed for creating a new `RandomState`, or None to select a seed
+            automatically.
+        """
+        super().__init__(random=random)
+
+    def initialize_episode(self, physics):
+        """Randomizes the tail and fin angles and the orientation of the Fish."""
+        quat = self.random.randn(4)
+        physics.named.data.qpos['root'][3:7] = quat / np.linalg.norm(quat)
+        for joint in _JOINTS:
+            physics.named.data.qpos[joint] = self.random.uniform(-.2, .2)
+        # Hide the target. It's irrelevant for this task.
+        physics.named.model.geom_rgba['target', 3] = 0
+        super().initialize_episode(physics)
+
+    def get_observation(self, physics):
+        """Returns an observation of joint angles, velocities and uprightness."""
+        obs = collections.OrderedDict()
+        obs['joint_angles'] = physics.joint_angles()
+        obs['upright'] = physics.upright()
+        obs['velocity'] = physics.velocity()
+        return obs
+
+    def get_reward(self, physics):
+        """Returns a smooth reward."""
+        return rewards.tolerance(physics.upright(), bounds=(1, 1), margin=1)
 
 
 class Swim(base.Task):
-  """A Fish `Task` for swimming with smooth reward."""
-
-  def __init__(self, random=None):
-    """Initializes an instance of `Swim`.
-    Args:
-      random: Optional, either a `numpy.random.RandomState` instance, an
-        integer seed for creating a new `RandomState`, or None to select a seed
-        automatically (default).
-    """
-    super().__init__(random=random)
-
-  def initialize_episode(self, physics):
-    """Sets the state of the environment at the start of each episode."""
-
-    quat = self.random.randn(4)
-    physics.named.data.qpos['root'][3:7] = quat / np.linalg.norm(quat)
-    for joint in _JOINTS:
-      physics.named.data.qpos[joint] = self.random.uniform(-.2, .2)
-    # Randomize target position.
-    physics.named.model.geom_pos['target', 'x'] = self.random.uniform(-.4, .4)
-    physics.named.model.geom_pos['target', 'y'] = self.random.uniform(-.4, .4)
-    physics.named.model.geom_pos['target', 'z'] = self.random.uniform(.1, .3)
-    super().initialize_episode(physics)
-
-  def get_observation(self, physics):
-    """Returns an observation of joints, target direction and velocities."""
-    obs = collections.OrderedDict()
-    obs['joint_angles'] = physics.joint_angles()
-    obs['upright'] = physics.upright()
-    obs['target'] = physics.mouth_to_target()
-    obs['velocity'] = physics.velocity()
-    return obs
-
-  def get_reward(self, physics):
-    """Returns a smooth reward."""
-    radii = physics.named.model.geom_size[['mouth', 'target'], 0].sum()
-    in_target = rewards.tolerance(np.linalg.norm(physics.mouth_to_target()),
-                                  bounds=(0, radii), margin=2*radii)
-    is_upright = 0.5 * (physics.upright() + 1)
-    return (7*in_target + is_upright) / 8
\ No newline at end of file
+    """A Fish `Task` for swimming with smooth reward."""
+
+    def __init__(self, random=None):
+        """Initializes an instance of `Swim`.
+        Args:
+          random: Optional, either a `numpy.random.RandomState` instance, an
+            integer seed for creating a new `RandomState`, or None to select a seed
+            automatically (default).
+        """
+        super().__init__(random=random)
+
+    def initialize_episode(self, physics):
+        """Sets the state of the environment at the start of each episode."""
+
+        quat = self.random.randn(4)
+        physics.named.data.qpos['root'][3:7] = quat / np.linalg.norm(quat)
+        for joint in _JOINTS:
+            physics.named.data.qpos[joint] = self.random.uniform(-.2, .2)
+        # Randomize target position.
+        physics.named.model.geom_pos['target', 'x'] = self.random.uniform(-.4, .4)
+        physics.named.model.geom_pos['target', 'y'] = self.random.uniform(-.4, .4)
+        physics.named.model.geom_pos['target', 'z'] = self.random.uniform(.1, .3)
+        super().initialize_episode(physics)
+
+    def get_observation(self, physics):
+        """Returns an observation of joints, target direction and velocities."""
+        obs = collections.OrderedDict()
+        obs['joint_angles'] = physics.joint_angles()
+        obs['upright'] = physics.upright()
+        obs['target'] = physics.mouth_to_target()
+        obs['velocity'] = physics.velocity()
+        return obs
+
+    def get_reward(self, physics):
+        """Returns a smooth reward."""
+        radii = physics.named.model.geom_size[['mouth', 'target'], 0].sum()
+        in_target = rewards.tolerance(np.linalg.norm(physics.mouth_to_target()),
+                                      bounds=(0, radii), margin=2*radii)
+        is_upright = 0.5 * (physics.upright() + 1)
+        return (7*in_target + is_upright) / 8
diff --git a/carl/envs/dmc/dmc_tasks/quadruped.py b/carl/envs/dmc/dmc_tasks/quadruped.py
index 9f327ac6..6b865c07 100644
--- a/carl/envs/dmc/dmc_tasks/quadruped.py
+++ b/carl/envs/dmc/dmc_tasks/quadruped.py
@@ -55,421 +55,421 @@
 
 def make_model(floor_size=None, terrain=False, rangefinders=False,
                walls_and_ball=False):
-  """Returns the model XML string."""
-  xml_string = common.read_model('quadruped.xml')
-  parser = etree.XMLParser(remove_blank_text=True)
-  mjcf = etree.XML(xml_string, parser)
+    """Returns the model XML string."""
+    xml_string = common.read_model('quadruped.xml')
+    parser = etree.XMLParser(remove_blank_text=True)
+    mjcf = etree.XML(xml_string, parser)
 
-  # Set floor size.
-  if floor_size is not None:
-    floor_geom = mjcf.find('.//geom[@name=\'floor\']')
-    floor_geom.attrib['size'] = f'{floor_size} {floor_size} .5'
+    # Set floor size.
+    if floor_size is not None:
+        floor_geom = mjcf.find('.//geom[@name=\'floor\']')
+        floor_geom.attrib['size'] = f'{floor_size} {floor_size} .5'
 
-  # Remove walls, ball and target.
-  if not walls_and_ball:
-    for wall in _WALLS:
-      wall_geom = xml_tools.find_element(mjcf, 'geom', wall)
-      wall_geom.getparent().remove(wall_geom)
+    # Remove walls, ball and target.
+    if not walls_and_ball:
+        for wall in _WALLS:
+            wall_geom = xml_tools.find_element(mjcf, 'geom', wall)
+            wall_geom.getparent().remove(wall_geom)
 
-    # Remove ball.
-    ball_body = xml_tools.find_element(mjcf, 'body', 'ball')
-    ball_body.getparent().remove(ball_body)
+        # Remove ball.
+        ball_body = xml_tools.find_element(mjcf, 'body', 'ball')
+        ball_body.getparent().remove(ball_body)
 
-    # Remove target.
-    target_site = xml_tools.find_element(mjcf, 'site', 'target')
-    target_site.getparent().remove(target_site)
+        # Remove target.
+        target_site = xml_tools.find_element(mjcf, 'site', 'target')
+        target_site.getparent().remove(target_site)
 
-  # Remove terrain.
-  if not terrain:
-    terrain_geom = xml_tools.find_element(mjcf, 'geom', 'terrain')
-    terrain_geom.getparent().remove(terrain_geom)
+    # Remove terrain.
+    if not terrain:
+        terrain_geom = xml_tools.find_element(mjcf, 'geom', 'terrain')
+        terrain_geom.getparent().remove(terrain_geom)
 
-  # Remove rangefinders if they're not used, as range computations can be
-  # expensive, especially in a scene with heightfields.
-  if not rangefinders:
-    rangefinder_sensors = mjcf.findall('.//rangefinder')
-    for rf in rangefinder_sensors:
-      rf.getparent().remove(rf)
+    # Remove rangefinders if they're not used, as range computations can be
+    # expensive, especially in a scene with heightfields.
+    if not rangefinders:
+        rangefinder_sensors = mjcf.findall('.//rangefinder')
+        for rf in rangefinder_sensors:
+            rf.getparent().remove(rf)
 
-  return etree.tostring(mjcf, pretty_print=True)
+    return etree.tostring(mjcf, pretty_print=True)
 
 
 @SUITE.add()
 def walk_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
-  """Returns the Walk task with the adapted context."""
-  xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED)
-  if context != {}:
-    xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
-  physics = Physics.from_xml_string(xml_string, common.ASSETS)
-  task = Move(desired_speed=_WALK_SPEED, random=random)
-  environment_kwargs = environment_kwargs or {}
-  return control.Environment(physics, task, time_limit=time_limit,
-                             control_timestep=_CONTROL_TIMESTEP,
-                             **environment_kwargs)
+    """Returns the Walk task with the adapted context."""
+    xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED)
+    if context != {}:
+        xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
+    physics = Physics.from_xml_string(xml_string, common.ASSETS)
+    task = Move(desired_speed=_WALK_SPEED, random=random)
+    environment_kwargs = environment_kwargs or {}
+    return control.Environment(physics, task, time_limit=time_limit,
+                               control_timestep=_CONTROL_TIMESTEP,
+                               **environment_kwargs)
 
 
 @SUITE.add()
 def run_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
-  """Returns the Run task with the adapted context."""
-  xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _RUN_SPEED)
-  if context != {}:
-    xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
-  physics = Physics.from_xml_string(xml_string, common.ASSETS)
-  task = Move(desired_speed=_RUN_SPEED, random=random)
-  environment_kwargs = environment_kwargs or {}
-  return control.Environment(physics, task, time_limit=time_limit,
-                             control_timestep=_CONTROL_TIMESTEP,
-                             **environment_kwargs)
+    """Returns the Run task with the adapted context."""
+    xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _RUN_SPEED)
+    if context != {}:
+        xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
+    physics = Physics.from_xml_string(xml_string, common.ASSETS)
+    task = Move(desired_speed=_RUN_SPEED, random=random)
+    environment_kwargs = environment_kwargs or {}
+    return control.Environment(physics, task, time_limit=time_limit,
+                               control_timestep=_CONTROL_TIMESTEP,
+                               **environment_kwargs)
 
 
 @SUITE.add()
 def escape_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None,
-           environment_kwargs=None):
-  """Returns the Escape task with the adapted context."""
-  xml_string = make_model(floor_size=40, terrain=True, rangefinders=True)
-  if context != {}:
-    xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
-  physics = Physics.from_xml_string(xml_string, common.ASSETS)
-  task = Escape(random=random)
-  environment_kwargs = environment_kwargs or {}
-  return control.Environment(physics, task, time_limit=time_limit,
-                             control_timestep=_CONTROL_TIMESTEP,
-                             **environment_kwargs)
+                   environment_kwargs=None):
+    """Returns the Escape task with the adapted context."""
+    xml_string = make_model(floor_size=40, terrain=True, rangefinders=True)
+    if context != {}:
+        xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
+    physics = Physics.from_xml_string(xml_string, common.ASSETS)
+    task = Escape(random=random)
+    environment_kwargs = environment_kwargs or {}
+    return control.Environment(physics, task, time_limit=time_limit,
+                               control_timestep=_CONTROL_TIMESTEP,
+                               **environment_kwargs)
 
 
 @SUITE.add()
 def fetch_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
-  """Returns the Fetch task with the adapted context."""
-  xml_string = make_model(walls_and_ball=True)
-  if context != {}:
-    xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
-  physics = Physics.from_xml_string(xml_string, common.ASSETS)
-  task = Fetch(random=random)
-  environment_kwargs = environment_kwargs or {}
-  return control.Environment(physics, task, time_limit=time_limit,
-                             control_timestep=_CONTROL_TIMESTEP,
-                             **environment_kwargs)
+    """Returns the Fetch task with the adapted context."""
+    xml_string = make_model(walls_and_ball=True)
+    if context != {}:
+        xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
+    physics = Physics.from_xml_string(xml_string, common.ASSETS)
+    task = Fetch(random=random)
+    environment_kwargs = environment_kwargs or {}
+    return control.Environment(physics, task, time_limit=time_limit,
+                               control_timestep=_CONTROL_TIMESTEP,
+                               **environment_kwargs)
 
 
 class Physics(mujoco.Physics):
-  """Physics simulation with additional features for the Quadruped domain."""
-
-  def _reload_from_data(self, data):
-    super()._reload_from_data(data)
-    # Clear cached sensor names when the physics is reloaded.
-    self._sensor_types_to_names = {}
-    self._hinge_names = []
-
-  def _get_sensor_names(self, *sensor_types):
-    try:
-      sensor_names = self._sensor_types_to_names[sensor_types]
-    except KeyError:
-      [sensor_ids] = np.where(np.in1d(self.model.sensor_type, sensor_types))
-      sensor_names = [self.model.id2name(s_id, 'sensor') for s_id in sensor_ids]
-      self._sensor_types_to_names[sensor_types] = sensor_names
-    return sensor_names
-
-  def torso_upright(self):
-    """Returns the dot-product of the torso z-axis and the global z-axis."""
-    return np.asarray(self.named.data.xmat['torso', 'zz'])
-
-  def torso_velocity(self):
-    """Returns the velocity of the torso, in the local frame."""
-    return self.named.data.sensordata['velocimeter'].copy()
-
-  def egocentric_state(self):
-    """Returns the state without global orientation or position."""
-    if not self._hinge_names:
-      [hinge_ids] = np.nonzero(self.model.jnt_type ==
-                               enums.mjtJoint.mjJNT_HINGE)
-      self._hinge_names = [self.model.id2name(j_id, 'joint')
-                           for j_id in hinge_ids]
-    return np.hstack((self.named.data.qpos[self._hinge_names],
-                      self.named.data.qvel[self._hinge_names],
-                      self.data.act))
-
-  def toe_positions(self):
-    """Returns toe positions in egocentric frame."""
-    torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
-    torso_pos = self.named.data.xpos['torso']
-    torso_to_toe = self.named.data.xpos[_TOES] - torso_pos
-    return torso_to_toe.dot(torso_frame)
-
-  def force_torque(self):
-    """Returns scaled force/torque sensor readings at the toes."""
-    force_torque_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_FORCE,
-                                                  enums.mjtSensor.mjSENS_TORQUE)
-    return np.arcsinh(self.named.data.sensordata[force_torque_sensors])
-
-  def imu(self):
-    """Returns IMU-like sensor readings."""
-    imu_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_GYRO,
-                                         enums.mjtSensor.mjSENS_ACCELEROMETER)
-    return self.named.data.sensordata[imu_sensors]
-
-  def rangefinder(self):
-    """Returns scaled rangefinder sensor readings."""
-    rf_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_RANGEFINDER)
-    rf_readings = self.named.data.sensordata[rf_sensors]
-    no_intersection = -1.0
-    return np.where(rf_readings == no_intersection, 1.0, np.tanh(rf_readings))
-
-  def origin_distance(self):
-    """Returns the distance from the origin to the workspace."""
-    return np.asarray(np.linalg.norm(self.named.data.site_xpos['workspace']))
-
-  def origin(self):
-    """Returns origin position in the torso frame."""
-    torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
-    torso_pos = self.named.data.xpos['torso']
-    return -torso_pos.dot(torso_frame)
-
-  def ball_state(self):
-    """Returns ball position and velocity relative to the torso frame."""
-    data = self.named.data
-    torso_frame = data.xmat['torso'].reshape(3, 3)
-    ball_rel_pos = data.xpos['ball'] - data.xpos['torso']
-    ball_rel_vel = data.qvel['ball_root'][:3] - data.qvel['root'][:3]
-    ball_rot_vel = data.qvel['ball_root'][3:]
-    ball_state = np.vstack((ball_rel_pos, ball_rel_vel, ball_rot_vel))
-    return ball_state.dot(torso_frame).ravel()
-
-  def target_position(self):
-    """Returns target position in torso frame."""
-    torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
-    torso_pos = self.named.data.xpos['torso']
-    torso_to_target = self.named.data.site_xpos['target'] - torso_pos
-    return torso_to_target.dot(torso_frame)
-
-  def ball_to_target_distance(self):
-    """Returns horizontal distance from the ball to the target."""
-    ball_to_target = (self.named.data.site_xpos['target'] -
-                      self.named.data.xpos['ball'])
-    return np.linalg.norm(ball_to_target[:2])
-
-  def self_to_ball_distance(self):
-    """Returns horizontal distance from the quadruped workspace to the ball."""
-    self_to_ball = (self.named.data.site_xpos['workspace']
-                    -self.named.data.xpos['ball'])
-    return np.linalg.norm(self_to_ball[:2])
+    """Physics simulation with additional features for the Quadruped domain."""
+
+    def _reload_from_data(self, data):
+        super()._reload_from_data(data)
+        # Clear cached sensor names when the physics is reloaded.
+        self._sensor_types_to_names = {}
+        self._hinge_names = []
+
+    def _get_sensor_names(self, *sensor_types):
+        try:
+            sensor_names = self._sensor_types_to_names[sensor_types]
+        except KeyError:
+            [sensor_ids] = np.where(np.in1d(self.model.sensor_type, sensor_types))
+            sensor_names = [self.model.id2name(s_id, 'sensor') for s_id in sensor_ids]
+            self._sensor_types_to_names[sensor_types] = sensor_names
+        return sensor_names
+
+    def torso_upright(self):
+        """Returns the dot-product of the torso z-axis and the global z-axis."""
+        return np.asarray(self.named.data.xmat['torso', 'zz'])
+
+    def torso_velocity(self):
+        """Returns the velocity of the torso, in the local frame."""
+        return self.named.data.sensordata['velocimeter'].copy()
+
+    def egocentric_state(self):
+        """Returns the state without global orientation or position."""
+        if not self._hinge_names:
+            [hinge_ids] = np.nonzero(self.model.jnt_type ==
+                                     enums.mjtJoint.mjJNT_HINGE)
+            self._hinge_names = [self.model.id2name(j_id, 'joint')
+                                 for j_id in hinge_ids]
+        return np.hstack((self.named.data.qpos[self._hinge_names],
+                          self.named.data.qvel[self._hinge_names],
+                          self.data.act))
+
+    def toe_positions(self):
+        """Returns toe positions in egocentric frame."""
+        torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
+        torso_pos = self.named.data.xpos['torso']
+        torso_to_toe = self.named.data.xpos[_TOES] - torso_pos
+        return torso_to_toe.dot(torso_frame)
+
+    def force_torque(self):
+        """Returns scaled force/torque sensor readings at the toes."""
+        force_torque_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_FORCE,
+                                                      enums.mjtSensor.mjSENS_TORQUE)
+        return np.arcsinh(self.named.data.sensordata[force_torque_sensors])
+
+    def imu(self):
+        """Returns IMU-like sensor readings."""
+        imu_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_GYRO,
+                                             enums.mjtSensor.mjSENS_ACCELEROMETER)
+        return self.named.data.sensordata[imu_sensors]
+
+    def rangefinder(self):
+        """Returns scaled rangefinder sensor readings."""
+        rf_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_RANGEFINDER)
+        rf_readings = self.named.data.sensordata[rf_sensors]
+        no_intersection = -1.0
+        return np.where(rf_readings == no_intersection, 1.0, np.tanh(rf_readings))
+
+    def origin_distance(self):
+        """Returns the distance from the origin to the workspace."""
+        return np.asarray(np.linalg.norm(self.named.data.site_xpos['workspace']))
+
+    def origin(self):
+        """Returns origin position in the torso frame."""
+        torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
+        torso_pos = self.named.data.xpos['torso']
+        return -torso_pos.dot(torso_frame)
+
+    def ball_state(self):
+        """Returns ball position and velocity relative to the torso frame."""
+        data = self.named.data
+        torso_frame = data.xmat['torso'].reshape(3, 3)
+        ball_rel_pos = data.xpos['ball'] - data.xpos['torso']
+        ball_rel_vel = data.qvel['ball_root'][:3] - data.qvel['root'][:3]
+        ball_rot_vel = data.qvel['ball_root'][3:]
+        ball_state = np.vstack((ball_rel_pos, ball_rel_vel, ball_rot_vel))
+        return ball_state.dot(torso_frame).ravel()
+
+    def target_position(self):
+        """Returns target position in torso frame."""
+        torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
+        torso_pos = self.named.data.xpos['torso']
+        torso_to_target = self.named.data.site_xpos['target'] - torso_pos
+        return torso_to_target.dot(torso_frame)
+
+    def ball_to_target_distance(self):
+        """Returns horizontal distance from the ball to the target."""
+        ball_to_target = (self.named.data.site_xpos['target'] -
+                          self.named.data.xpos['ball'])
+        return np.linalg.norm(ball_to_target[:2])
+
+    def self_to_ball_distance(self):
+        """Returns horizontal distance from the quadruped workspace to the ball."""
+        self_to_ball = (self.named.data.site_xpos['workspace']
+                        - self.named.data.xpos['ball'])
+        return np.linalg.norm(self_to_ball[:2])
 
 
 def _find_non_contacting_height(physics, orientation, x_pos=0.0, y_pos=0.0):
-  """Find a height with no contacts given a body orientation.
-  Args:
-    physics: An instance of `Physics`.
-    orientation: A quaternion.
-    x_pos: A float. Position along global x-axis.
-    y_pos: A float. Position along global y-axis.
-  Raises:
-    RuntimeError: If a non-contacting configuration has not been found after
-    10,000 attempts.
-  """
-  z_pos = 0.0  # Start embedded in the floor.
-  num_contacts = 1
-  num_attempts = 0
-  # Move up in 1cm increments until no contacts.
-  while num_contacts > 0:
-    try:
-      with physics.reset_context():
-        physics.named.data.qpos['root'][:3] = x_pos, y_pos, z_pos
-        physics.named.data.qpos['root'][3:] = orientation
-    except control.PhysicsError:
-      # We may encounter a PhysicsError here due to filling the contact
-      # buffer, in which case we simply increment the height and continue.
-      pass
-    num_contacts = physics.data.ncon
-    z_pos += 0.01
-    num_attempts += 1
-    if num_attempts > 10000:
-      raise RuntimeError('Failed to find a non-contacting configuration.')
+    """Find a height with no contacts given a body orientation.
+    Args:
+      physics: An instance of `Physics`.
+      orientation: A quaternion.
+      x_pos: A float. Position along global x-axis.
+      y_pos: A float. Position along global y-axis.
+    Raises:
+      RuntimeError: If a non-contacting configuration has not been found after
+      10,000 attempts.
+    """
+    z_pos = 0.0  # Start embedded in the floor.
+    num_contacts = 1
+    num_attempts = 0
+    # Move up in 1cm increments until no contacts.
+    while num_contacts > 0:
+        try:
+            with physics.reset_context():
+                physics.named.data.qpos['root'][:3] = x_pos, y_pos, z_pos
+                physics.named.data.qpos['root'][3:] = orientation
+        except control.PhysicsError:
+            # We may encounter a PhysicsError here due to filling the contact
+            # buffer, in which case we simply increment the height and continue.
+            pass
+        num_contacts = physics.data.ncon
+        z_pos += 0.01
+        num_attempts += 1
+        if num_attempts > 10000:
+            raise RuntimeError('Failed to find a non-contacting configuration.')
 
 
 def _common_observations(physics):
-  """Returns the observations common to all tasks."""
-  obs = collections.OrderedDict()
-  obs['egocentric_state'] = physics.egocentric_state()
-  obs['torso_velocity'] = physics.torso_velocity()
-  obs['torso_upright'] = physics.torso_upright()
-  obs['imu'] = physics.imu()
-  obs['force_torque'] = physics.force_torque()
-  return obs
+    """Returns the observations common to all tasks."""
+    obs = collections.OrderedDict()
+    obs['egocentric_state'] = physics.egocentric_state()
+    obs['torso_velocity'] = physics.torso_velocity()
+    obs['torso_upright'] = physics.torso_upright()
+    obs['imu'] = physics.imu()
+    obs['force_torque'] = physics.force_torque()
+    return obs
 
 
 def _upright_reward(physics, deviation_angle=0):
-  """Returns a reward proportional to how upright the torso is.
-  Args:
-    physics: an instance of `Physics`.
-    deviation_angle: A float, in degrees. The reward is 0 when the torso is
-      exactly upside-down and 1 when the torso's z-axis is less than
-      `deviation_angle` away from the global z-axis.
-  """
-  deviation = np.cos(np.deg2rad(deviation_angle))
-  return rewards.tolerance(
-      physics.torso_upright(),
-      bounds=(deviation, float('inf')),
-      sigmoid='linear',
-      margin=1 + deviation,
-      value_at_margin=0)
-
-
-class Move(base.Task):
-  """A quadruped task solved by moving forward at a designated speed."""
-
-  def __init__(self, desired_speed, random=None):
-    """Initializes an instance of `Move`.
-    Args:
-      desired_speed: A float. If this value is zero, reward is given simply
-        for standing upright. Otherwise this specifies the horizontal velocity
-        at which the velocity-dependent reward component is maximized.
-      random: Optional, either a `numpy.random.RandomState` instance, an
-        integer seed for creating a new `RandomState`, or None to select a seed
-        automatically (default).
-    """
-    self._desired_speed = desired_speed
-    super().__init__(random=random)
-
-  def initialize_episode(self, physics):
-    """Sets the state of the environment at the start of each episode.
+    """Returns a reward proportional to how upright the torso is.
     Args:
-      physics: An instance of `Physics`.
+      physics: an instance of `Physics`.
+      deviation_angle: A float, in degrees. The reward is 0 when the torso is
+        exactly upside-down and 1 when the torso's z-axis is less than
+        `deviation_angle` away from the global z-axis.
     """
-    # Initial configuration.
-    orientation = self.random.randn(4)
-    orientation /= np.linalg.norm(orientation)
-    _find_non_contacting_height(physics, orientation)
-    super().initialize_episode(physics)
-
-  def get_observation(self, physics):
-    """Returns an observation to the agent."""
-    return _common_observations(physics)
-
-  def get_reward(self, physics):
-    """Returns a reward to the agent."""
+    deviation = np.cos(np.deg2rad(deviation_angle))
+    return rewards.tolerance(
+        physics.torso_upright(),
+        bounds=(deviation, float('inf')),
+        sigmoid='linear',
+        margin=1 + deviation,
+        value_at_margin=0)
 
-    # Move reward term.
-    move_reward = rewards.tolerance(
-        physics.torso_velocity()[0],
-        bounds=(self._desired_speed, float('inf')),
-        margin=self._desired_speed,
-        value_at_margin=0.5,
-        sigmoid='linear')
 
-    return _upright_reward(physics) * move_reward
+class Move(base.Task):
+    """A quadruped task solved by moving forward at a designated speed."""
+
+    def __init__(self, desired_speed, random=None):
+        """Initializes an instance of `Move`.
+        Args:
+          desired_speed: A float. If this value is zero, reward is given simply
+            for standing upright. Otherwise this specifies the horizontal velocity
+            at which the velocity-dependent reward component is maximized.
+          random: Optional, either a `numpy.random.RandomState` instance, an
+            integer seed for creating a new `RandomState`, or None to select a seed
+            automatically (default).
+        """
+        self._desired_speed = desired_speed
+        super().__init__(random=random)
+
+    def initialize_episode(self, physics):
+        """Sets the state of the environment at the start of each episode.
+        Args:
+          physics: An instance of `Physics`.
+        """
+        # Initial configuration.
+        orientation = self.random.randn(4)
+        orientation /= np.linalg.norm(orientation)
+        _find_non_contacting_height(physics, orientation)
+        super().initialize_episode(physics)
+
+    def get_observation(self, physics):
+        """Returns an observation to the agent."""
+        return _common_observations(physics)
+
+    def get_reward(self, physics):
+        """Returns a reward to the agent."""
+
+        # Move reward term.
+        move_reward = rewards.tolerance(
+            physics.torso_velocity()[0],
+            bounds=(self._desired_speed, float('inf')),
+            margin=self._desired_speed,
+            value_at_margin=0.5,
+            sigmoid='linear')
+
+        return _upright_reward(physics) * move_reward
 
 
 class Escape(base.Task):
-  """A quadruped task solved by escaping a bowl-shaped terrain."""
-
-  def initialize_episode(self, physics):
-    """Sets the state of the environment at the start of each episode.
-    Args:
-      physics: An instance of `Physics`.
-    """
-    # Get heightfield resolution, assert that it is square.
-    res = physics.model.hfield_nrow[_HEIGHTFIELD_ID]
-    assert res == physics.model.hfield_ncol[_HEIGHTFIELD_ID]
-    # Sinusoidal bowl shape.
-    row_grid, col_grid = np.ogrid[-1:1:res*1j, -1:1:res*1j]
-    radius = np.clip(np.sqrt(col_grid**2 + row_grid**2), .04, 1)
-    bowl_shape = .5 - np.cos(2*np.pi*radius)/2
-    # Random smooth bumps.
-    terrain_size = 2 * physics.model.hfield_size[_HEIGHTFIELD_ID, 0]
-    bump_res = int(terrain_size / _TERRAIN_BUMP_SCALE)
-    bumps = self.random.uniform(_TERRAIN_SMOOTHNESS, 1, (bump_res, bump_res))
-    smooth_bumps = ndimage.zoom(bumps, res / float(bump_res))
-    # Terrain is elementwise product.
-    terrain = bowl_shape * smooth_bumps
-    start_idx = physics.model.hfield_adr[_HEIGHTFIELD_ID]
-    physics.model.hfield_data[start_idx:start_idx+res**2] = terrain.ravel()
-    super().initialize_episode(physics)
-
-    # If we have a rendering context, we need to re-upload the modified
-    # heightfield data.
-    if physics.contexts:
-      with physics.contexts.gl.make_current() as ctx:
-        ctx.call(mjlib.mjr_uploadHField,
-                 physics.model.ptr,
-                 physics.contexts.mujoco.ptr,
-                 _HEIGHTFIELD_ID)
-
-    # Initial configuration.
-    orientation = self.random.randn(4)
-    orientation /= np.linalg.norm(orientation)
-    _find_non_contacting_height(physics, orientation)
-
-  def get_observation(self, physics):
-    """Returns an observation to the agent."""
-    obs = _common_observations(physics)
-    obs['origin'] = physics.origin()
-    obs['rangefinder'] = physics.rangefinder()
-    return obs
-
-  def get_reward(self, physics):
-    """Returns a reward to the agent."""
-
-    # Escape reward term.
-    terrain_size = physics.model.hfield_size[_HEIGHTFIELD_ID, 0]
-    escape_reward = rewards.tolerance(
-        physics.origin_distance(),
-        bounds=(terrain_size, float('inf')),
-        margin=terrain_size,
-        value_at_margin=0,
-        sigmoid='linear')
-
-    return _upright_reward(physics, deviation_angle=20) * escape_reward
+    """A quadruped task solved by escaping a bowl-shaped terrain."""
+
+    def initialize_episode(self, physics):
+        """Sets the state of the environment at the start of each episode.
+        Args:
+          physics: An instance of `Physics`.
+        """
+        # Get heightfield resolution, assert that it is square.
+        res = physics.model.hfield_nrow[_HEIGHTFIELD_ID]
+        assert res == physics.model.hfield_ncol[_HEIGHTFIELD_ID]
+        # Sinusoidal bowl shape.
+        row_grid, col_grid = np.ogrid[-1:1:res*1j, -1:1:res*1j]
+        radius = np.clip(np.sqrt(col_grid**2 + row_grid**2), .04, 1)
+        bowl_shape = .5 - np.cos(2*np.pi*radius)/2
+        # Random smooth bumps.
+        terrain_size = 2 * physics.model.hfield_size[_HEIGHTFIELD_ID, 0]
+        bump_res = int(terrain_size / _TERRAIN_BUMP_SCALE)
+        bumps = self.random.uniform(_TERRAIN_SMOOTHNESS, 1, (bump_res, bump_res))
+        smooth_bumps = ndimage.zoom(bumps, res / float(bump_res))
+        # Terrain is elementwise product.
+        terrain = bowl_shape * smooth_bumps
+        start_idx = physics.model.hfield_adr[_HEIGHTFIELD_ID]
+        physics.model.hfield_data[start_idx:start_idx+res**2] = terrain.ravel()
+        super().initialize_episode(physics)
+
+        # If we have a rendering context, we need to re-upload the modified
+        # heightfield data.
+        if physics.contexts:
+            with physics.contexts.gl.make_current() as ctx:
+                ctx.call(mjlib.mjr_uploadHField,
+                         physics.model.ptr,
+                         physics.contexts.mujoco.ptr,
+                         _HEIGHTFIELD_ID)
+
+        # Initial configuration.
+        orientation = self.random.randn(4)
+        orientation /= np.linalg.norm(orientation)
+        _find_non_contacting_height(physics, orientation)
+
+    def get_observation(self, physics):
+        """Returns an observation to the agent."""
+        obs = _common_observations(physics)
+        obs['origin'] = physics.origin()
+        obs['rangefinder'] = physics.rangefinder()
+        return obs
+
+    def get_reward(self, physics):
+        """Returns a reward to the agent."""
+
+        # Escape reward term.
+        terrain_size = physics.model.hfield_size[_HEIGHTFIELD_ID, 0]
+        escape_reward = rewards.tolerance(
+            physics.origin_distance(),
+            bounds=(terrain_size, float('inf')),
+            margin=terrain_size,
+            value_at_margin=0,
+            sigmoid='linear')
+
+        return _upright_reward(physics, deviation_angle=20) * escape_reward
 
 
 class Fetch(base.Task):
-  """A quadruped task solved by bringing a ball to the origin."""
-
-  def initialize_episode(self, physics):
-    """Sets the state of the environment at the start of each episode.
-    Args:
-      physics: An instance of `Physics`.
-    """
-    # Initial configuration, random azimuth and horizontal position.
-    azimuth = self.random.uniform(0, 2*np.pi)
-    orientation = np.array((np.cos(azimuth/2), 0, 0, np.sin(azimuth/2)))
-    spawn_radius = 0.9 * physics.named.model.geom_size['floor', 0]
-    x_pos, y_pos = self.random.uniform(-spawn_radius, spawn_radius, size=(2,))
-    _find_non_contacting_height(physics, orientation, x_pos, y_pos)
-
-    # Initial ball state.
-    physics.named.data.qpos['ball_root'][:2] = self.random.uniform(
-        -spawn_radius, spawn_radius, size=(2,))
-    physics.named.data.qpos['ball_root'][2] = 2
-    physics.named.data.qvel['ball_root'][:2] = 5*self.random.randn(2)
-    super().initialize_episode(physics)
-
-  def get_observation(self, physics):
-    """Returns an observation to the agent."""
-    obs = _common_observations(physics)
-    obs['ball_state'] = physics.ball_state()
-    obs['target_position'] = physics.target_position()
-    return obs
-
-  def get_reward(self, physics):
-    """Returns a reward to the agent."""
-
-    # Reward for moving close to the ball.
-    arena_radius = physics.named.model.geom_size['floor', 0] * np.sqrt(2)
-    workspace_radius = physics.named.model.site_size['workspace', 0]
-    ball_radius = physics.named.model.geom_size['ball', 0]
-    reach_reward = rewards.tolerance(
-        physics.self_to_ball_distance(),
-        bounds=(0, workspace_radius+ball_radius),
-        sigmoid='linear',
-        margin=arena_radius, value_at_margin=0)
-
-    # Reward for bringing the ball to the target.
-    target_radius = physics.named.model.site_size['target', 0]
-    fetch_reward = rewards.tolerance(
-        physics.ball_to_target_distance(),
-        bounds=(0, target_radius),
-        sigmoid='linear',
-        margin=arena_radius, value_at_margin=0)
-
-    reach_then_fetch = reach_reward * (0.5 + 0.5*fetch_reward)
-
-    return _upright_reward(physics) * reach_then_fetch
\ No newline at end of file
+    """A quadruped task solved by bringing a ball to the origin."""
+
+    def initialize_episode(self, physics):
+        """Sets the state of the environment at the start of each episode.
+        Args:
+          physics: An instance of `Physics`.
+        """
+        # Initial configuration, random azimuth and horizontal position.
+        azimuth = self.random.uniform(0, 2*np.pi)
+        orientation = np.array((np.cos(azimuth/2), 0, 0, np.sin(azimuth/2)))
+        spawn_radius = 0.9 * physics.named.model.geom_size['floor', 0]
+        x_pos, y_pos = self.random.uniform(-spawn_radius, spawn_radius, size=(2,))
+        _find_non_contacting_height(physics, orientation, x_pos, y_pos)
+
+        # Initial ball state.
+        physics.named.data.qpos['ball_root'][:2] = self.random.uniform(
+            -spawn_radius, spawn_radius, size=(2,))
+        physics.named.data.qpos['ball_root'][2] = 2
+        physics.named.data.qvel['ball_root'][:2] = 5*self.random.randn(2)
+        super().initialize_episode(physics)
+
+    def get_observation(self, physics):
+        """Returns an observation to the agent."""
+        obs = _common_observations(physics)
+        obs['ball_state'] = physics.ball_state()
+        obs['target_position'] = physics.target_position()
+        return obs
+
+    def get_reward(self, physics):
+        """Returns a reward to the agent."""
+
+        # Reward for moving close to the ball.
+        arena_radius = physics.named.model.geom_size['floor', 0] * np.sqrt(2)
+        workspace_radius = physics.named.model.site_size['workspace', 0]
+        ball_radius = physics.named.model.geom_size['ball', 0]
+        reach_reward = rewards.tolerance(
+            physics.self_to_ball_distance(),
+            bounds=(0, workspace_radius+ball_radius),
+            sigmoid='linear',
+            margin=arena_radius, value_at_margin=0)
+
+        # Reward for bringing the ball to the target.
+        target_radius = physics.named.model.site_size['target', 0]
+        fetch_reward = rewards.tolerance(
+            physics.ball_to_target_distance(),
+            bounds=(0, target_radius),
+            sigmoid='linear',
+            margin=arena_radius, value_at_margin=0)
+
+        reach_then_fetch = reach_reward * (0.5 + 0.5*fetch_reward)
+
+        return _upright_reward(physics) * reach_then_fetch
diff --git a/carl/envs/dmc/dmc_tasks/utils.py b/carl/envs/dmc/dmc_tasks/utils.py
index 1efaf301..35ea407f 100644
--- a/carl/envs/dmc/dmc_tasks/utils.py
+++ b/carl/envs/dmc/dmc_tasks/utils.py
@@ -2,127 +2,139 @@
 
 
 def adapt_context(xml_string, context, context_mask=[]):
-  """Adapts and returns the xml_string of the model with the given context."""
-  mjcf = etree.fromstring(xml_string)
-  default = mjcf.find("./default/")
-  if default is None:
-    default = etree.Element("default")
-    mjcf.addnext(default)
-
-  if "joint_daming" not in context_mask:
-    # adjust damping for all joints if damping is already an attribute
-    for joint_find in mjcf.findall(".//joint[@damping]"):
-      joint_damping = joint_find.get("damping")
-      joint_find.set("damping", str(float(joint_damping) * context["joint_damping"]))
-
-  if "joint_stiffness" not in context_mask:
-    # adjust stiffness for all joints if stiffness is already an attribute
-    for joint_find in mjcf.findall(".//joint[@stiffness]"):
-      joint_stiffness = joint_find.get("stiffness")
-      joint_find.set("stiffness", str(float(joint_stiffness) * context["joint_stiffness"]))
-
-  # set default joint damping if default/joint is not present
-  joint = mjcf.find("./default/joint")
-  if joint is None:
-    joint = etree.Element("joint")
-    default.addnext(joint)
+    """Adapts and returns the xml_string of the model with the given context."""
+    mjcf = etree.fromstring(xml_string)
+    default = mjcf.find("./default/")
+    if default is None:
+        default = etree.Element("default")
+        mjcf.addnext(default)
+
     if "joint_daming" not in context_mask:
-      def_joint_damping = 0.1
-      default_joint_damping = str(float(def_joint_damping) * context["joint_damping"])
-      joint.set("damping", default_joint_damping)
+        # adjust damping for all joints if damping is already an attribute
+        for joint_find in mjcf.findall(".//joint[@damping]"):
+            joint_damping = joint_find.get("damping")
+            joint_find.set("damping", str(float(joint_damping) * context["joint_damping"]))
+
     if "joint_stiffness" not in context_mask:
-      default_joint_stiffness = str(context["joint_stiffness"])
-      joint.set("stiffness", default_joint_stiffness)
-
-  # adjust friction for all geom elements with friction attribute
-  for geom_find in mjcf.findall(".//geom[@friction]"):
-    friction = geom_find.get("friction").split(" ")
-    frict_str = ""
-    for i, (f, d) in enumerate(zip(friction, [context["friction_tangential"], context["friction_torsional"], context["friction_rolling"]])):
-      if (i == 0 and "friction_tangential" not in context_mask) or (i == 1 and "friction_torsional" not in context_mask) or (i == 2 and "friction_rolling" not in context_mask):
-        frict_str += str(float(f) * d) + " "
-      else:
-        frict_str += str(f) + " "
-    geom_find.set("friction", frict_str[:-1])
-
-  if "geom_density" not in context_mask:
-    # adjust density for all geom elements with density attribute
-    for geom_find in mjcf.findall(".//geom[@density]"):
-      geom_find.set("density", str(float(geom_find.get("density")) * context["geom_density"]))
-
-  # create default geom if it does not exist
-  geom = mjcf.find("./default/geom")
-  if geom is None:
-    geom = etree.Element("geom")
-    default.addnext(geom)
-
-  # set default friction
-  if geom.get("friction") is None:
-    default_friction_tangential = 1.
-    default_friction_torsional = 0.005
-    default_friction_rolling = 0.0001
-    geom.set("friction", " ".join([
-      (str(default_friction_tangential * context["friction_tangential"]) if "friction_tangential" not in context_mask else str(default_friction_tangential)),
-      (str(default_friction_torsional * context["friction_torsional"]) if "friction_torsional" not in context_mask else str(default_friction_torsional)),
-      (str(default_friction_rolling * context["friction_rolling"]) if "friction_rolling" not in context_mask else str(default_friction_rolling)),
-    ]))
-
-  if "geom_density" not in context_mask:
-    # set default density
-    geom_density = geom.get("density")
-    if geom_density is None:
-      geom_density = 1000
-      geom.set("density", str(float(geom_density) * context["geom_density"]))
-
-  if "actuator_strength" not in context_mask:
-    # scale all actuators with the actuator strength factor
-    actuators = mjcf.findall("./actuator/")
-    for actuator in actuators:
-      gear = actuator.get("gear")
-      if gear is None:
-        gear = 1
-      actuator.set("gear", str(float(gear) * context["actuator_strength"]))
-
-  # find option settings and override them if they exist, otherwise create new option
-  option = mjcf.find(".//option")
-  if option is None:
-    option = etree.Element("option")
-    mjcf.append(option)
-
-  if "gravity" not in context_mask:
-    gravity = option.get("gravity")
-    if gravity is not None:
-      g = gravity.split(" ")
-      gravity = " ".join([g[0], g[1], str(context["gravity"])])
-    else:
-      gravity = " ".join(["0", "0", str(context["gravity"])])
-    option.set("gravity", gravity)
-  
-  if "wind" not in context_mask:
-    wind = option.get("wind")
-    if wind is not None:
-      w = wind.split(" ")
-      wind = " ".join([
-        (str(context["wind_x"]) if "wind_x" not in context_mask else w[0]),
-        (str(context["wind_y"]) if "wind_y" not in context_mask else w[1]),
-        (str(context["wind_z"]) if "wind_z" not in context_mask else w[2]),
-      ])
-    else:
-      wind = " ".join([
-        (str(context["wind_x"]) if "wind_x" not in context_mask else "0"),
-        (str(context["wind_y"]) if "wind_y" not in context_mask else "0"),
-        (str(context["wind_z"]) if "wind_z" not in context_mask else "0"),
-      ])
-    option.set("wind", wind)
-
-  if "timestep" not in context_mask:
-    option.set("timestep", str(context["timestep"]))
-  
-  if "density" not in context_mask:
-    option.set("density", str(context["density"]))
-  
-  if "viscosity" not in context_mask:
-    option.set("viscosity", str(context["viscosity"]))
-
-  xml_string = etree.tostring(mjcf, pretty_print=True)
-  return xml_string
+        # adjust stiffness for all joints if stiffness is already an attribute
+        for joint_find in mjcf.findall(".//joint[@stiffness]"):
+            joint_stiffness = joint_find.get("stiffness")
+            joint_find.set("stiffness", str(float(joint_stiffness) * context["joint_stiffness"]))
+
+    # set default joint damping if default/joint is not present
+    joint = mjcf.find("./default/joint")
+    if joint is None:
+        joint = etree.Element("joint")
+        default.addnext(joint)
+        if "joint_daming" not in context_mask:
+            def_joint_damping = 0.1
+            default_joint_damping = str(float(def_joint_damping) * context["joint_damping"])
+            joint.set("damping", default_joint_damping)
+        if "joint_stiffness" not in context_mask:
+            default_joint_stiffness = str(context["joint_stiffness"])
+            joint.set("stiffness", default_joint_stiffness)
+
+    # adjust friction for all geom elements with friction attribute
+    for geom_find in mjcf.findall(".//geom[@friction]"):
+        friction = geom_find.get("friction").split(" ")
+        frict_str = ""
+        for i, (f, d) in enumerate(
+            zip(
+                friction,
+                [context["friction_tangential"],
+                 context["friction_torsional"],
+                 context["friction_rolling"]])):
+            if (
+                (i == 0 and "friction_tangential" not in context_mask)
+                or (i == 1 and "friction_torsional" not in context_mask)
+                or (i == 2 and "friction_rolling" not in context_mask)
+            ):
+                frict_str += str(float(f) * d) + " "
+            else:
+                frict_str += str(f) + " "
+        geom_find.set("friction", frict_str[:-1])
+
+    if "geom_density" not in context_mask:
+        # adjust density for all geom elements with density attribute
+        for geom_find in mjcf.findall(".//geom[@density]"):
+            geom_find.set("density", str(float(geom_find.get("density")) * context["geom_density"]))
+
+    # create default geom if it does not exist
+    geom = mjcf.find("./default/geom")
+    if geom is None:
+        geom = etree.Element("geom")
+        default.addnext(geom)
+
+    # set default friction
+    if geom.get("friction") is None:
+        default_friction_tangential = 1.
+        default_friction_torsional = 0.005
+        default_friction_rolling = 0.0001
+        geom.set("friction", " ".join([
+            (str(default_friction_tangential * context["friction_tangential"])
+             if "friction_tangential" not in context_mask else str(default_friction_tangential)),
+            (str(default_friction_torsional * context["friction_torsional"])
+                if "friction_torsional" not in context_mask else str(default_friction_torsional)),
+            (str(default_friction_rolling * context["friction_rolling"])
+                if "friction_rolling" not in context_mask else str(default_friction_rolling)),
+        ]))
+
+    if "geom_density" not in context_mask:
+        # set default density
+        geom_density = geom.get("density")
+        if geom_density is None:
+            geom_density = 1000
+            geom.set("density", str(float(geom_density) * context["geom_density"]))
+
+    if "actuator_strength" not in context_mask:
+        # scale all actuators with the actuator strength factor
+        actuators = mjcf.findall("./actuator/")
+        for actuator in actuators:
+            gear = actuator.get("gear")
+            if gear is None:
+                gear = 1
+            actuator.set("gear", str(float(gear) * context["actuator_strength"]))
+
+    # find option settings and override them if they exist, otherwise create new option
+    option = mjcf.find(".//option")
+    if option is None:
+        option = etree.Element("option")
+        mjcf.append(option)
+
+    if "gravity" not in context_mask:
+        gravity = option.get("gravity")
+        if gravity is not None:
+            g = gravity.split(" ")
+            gravity = " ".join([g[0], g[1], str(context["gravity"])])
+        else:
+            gravity = " ".join(["0", "0", str(context["gravity"])])
+        option.set("gravity", gravity)
+
+    if "wind" not in context_mask:
+        wind = option.get("wind")
+        if wind is not None:
+            w = wind.split(" ")
+            wind = " ".join([
+                (str(context["wind_x"]) if "wind_x" not in context_mask else w[0]),
+                (str(context["wind_y"]) if "wind_y" not in context_mask else w[1]),
+                (str(context["wind_z"]) if "wind_z" not in context_mask else w[2]),
+            ])
+        else:
+            wind = " ".join([
+                (str(context["wind_x"]) if "wind_x" not in context_mask else "0"),
+                (str(context["wind_y"]) if "wind_y" not in context_mask else "0"),
+                (str(context["wind_z"]) if "wind_z" not in context_mask else "0"),
+            ])
+        option.set("wind", wind)
+
+    if "timestep" not in context_mask:
+        option.set("timestep", str(context["timestep"]))
+
+    if "density" not in context_mask:
+        option.set("density", str(context["density"]))
+
+    if "viscosity" not in context_mask:
+        option.set("viscosity", str(context["viscosity"]))
+
+    xml_string = etree.tostring(mjcf, pretty_print=True)
+    return xml_string
diff --git a/carl/envs/dmc/dmc_tasks/walker.py b/carl/envs/dmc/dmc_tasks/walker.py
index ba162f89..cf2679bf 100644
--- a/carl/envs/dmc/dmc_tasks/walker.py
+++ b/carl/envs/dmc/dmc_tasks/walker.py
@@ -16,7 +16,6 @@
 """Planar Walker Domain."""
 
 import collections
-from multiprocessing.context import _force_start_method
 
 from dm_control import mujoco
 from dm_control.rl import control
@@ -43,119 +42,119 @@
 
 
 def get_model_and_assets():
-  """Returns a tuple containing the model XML string and a dict of assets."""
-  return common.read_model('walker.xml'), common.ASSETS
+    """Returns a tuple containing the model XML string and a dict of assets."""
+    return common.read_model('walker.xml'), common.ASSETS
 
 
 @SUITE.add('benchmarking')
 def stand_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
-  """Returns the Stand task with the adapted context."""
-  xml_string, assets = get_model_and_assets()
-  if context != {}:
-    xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
-  physics = Physics.from_xml_string(xml_string, assets)
-  task = PlanarWalker(move_speed=0, random=random)
-  environment_kwargs = environment_kwargs or {}
-  return control.Environment(
-      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
-      **environment_kwargs)
+    """Returns the Stand task with the adapted context."""
+    xml_string, assets = get_model_and_assets()
+    if context != {}:
+        xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
+    physics = Physics.from_xml_string(xml_string, assets)
+    task = PlanarWalker(move_speed=0, random=random)
+    environment_kwargs = environment_kwargs or {}
+    return control.Environment(
+        physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+        **environment_kwargs)
 
 
 @SUITE.add('benchmarking')
 def walk_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
-  """Returns the Walk task with the adapted context."""
-  xml_string, assets = get_model_and_assets()
-  if context != {}:
-    xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
-  physics = Physics.from_xml_string(xml_string, assets)
-  task = PlanarWalker(move_speed=_WALK_SPEED, random=random)
-  environment_kwargs = environment_kwargs or {}
-  return control.Environment(
-      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
-      **environment_kwargs)
+    """Returns the Walk task with the adapted context."""
+    xml_string, assets = get_model_and_assets()
+    if context != {}:
+        xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
+    physics = Physics.from_xml_string(xml_string, assets)
+    task = PlanarWalker(move_speed=_WALK_SPEED, random=random)
+    environment_kwargs = environment_kwargs or {}
+    return control.Environment(
+        physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+        **environment_kwargs)
 
 
 @SUITE.add('benchmarking')
 def run_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
-  """Returns the Run task with the adapted context."""
-  xml_string, assets = get_model_and_assets()
-  if context != {}:
-    xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
-  physics = Physics.from_xml_string(xml_string, assets)
-  task = PlanarWalker(move_speed=_RUN_SPEED, random=random)
-  environment_kwargs = environment_kwargs or {}
-  return control.Environment(
-      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
-      **environment_kwargs)
+    """Returns the Run task with the adapted context."""
+    xml_string, assets = get_model_and_assets()
+    if context != {}:
+        xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
+    physics = Physics.from_xml_string(xml_string, assets)
+    task = PlanarWalker(move_speed=_RUN_SPEED, random=random)
+    environment_kwargs = environment_kwargs or {}
+    return control.Environment(
+        physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+        **environment_kwargs)
 
 
 class Physics(mujoco.Physics):
-  """Physics simulation with additional features for the Walker domain."""
+    """Physics simulation with additional features for the Walker domain."""
 
-  def torso_upright(self):
-    """Returns projection from z-axes of torso to the z-axes of world."""
-    return self.named.data.xmat['torso', 'zz']
+    def torso_upright(self):
+        """Returns projection from z-axes of torso to the z-axes of world."""
+        return self.named.data.xmat['torso', 'zz']
 
-  def torso_height(self):
-    """Returns the height of the torso."""
-    return self.named.data.xpos['torso', 'z']
+    def torso_height(self):
+        """Returns the height of the torso."""
+        return self.named.data.xpos['torso', 'z']
 
-  def horizontal_velocity(self):
-    """Returns the horizontal velocity of the center-of-mass."""
-    return self.named.data.sensordata['torso_subtreelinvel'][0]
+    def horizontal_velocity(self):
+        """Returns the horizontal velocity of the center-of-mass."""
+        return self.named.data.sensordata['torso_subtreelinvel'][0]
 
-  def orientations(self):
-    """Returns planar orientations of all bodies."""
-    return self.named.data.xmat[1:, ['xx', 'xz']].ravel()
+    def orientations(self):
+        """Returns planar orientations of all bodies."""
+        return self.named.data.xmat[1:, ['xx', 'xz']].ravel()
 
 
 class PlanarWalker(base.Task):
-  """A planar walker task."""
-
-  def __init__(self, move_speed, random=None):
-    """Initializes an instance of `PlanarWalker`.
-    Args:
-      move_speed: A float. If this value is zero, reward is given simply for
-        standing up. Otherwise this specifies a target horizontal velocity for
-        the walking task.
-      random: Optional, either a `numpy.random.RandomState` instance, an
-        integer seed for creating a new `RandomState`, or None to select a seed
-        automatically (default).
-    """
-    self._move_speed = move_speed
-    super().__init__(random=random)
-
-  def initialize_episode(self, physics):
-    """Sets the state of the environment at the start of each episode.
-    In 'standing' mode, use initial orientation and small velocities.
-    In 'random' mode, randomize joint angles and let fall to the floor.
-    Args:
-      physics: An instance of `Physics`.
-    """
-    randomizers.randomize_limited_and_rotational_joints(physics, self.random)
-    super().initialize_episode(physics)
-
-  def get_observation(self, physics):
-    """Returns an observation of body orientations, height and velocites."""
-    obs = collections.OrderedDict()
-    obs['orientations'] = physics.orientations()
-    obs['height'] = physics.torso_height()
-    obs['velocity'] = physics.velocity()
-    return obs
-
-  def get_reward(self, physics):
-    """Returns a reward to the agent."""
-    standing = rewards.tolerance(physics.torso_height(),
-                                 bounds=(_STAND_HEIGHT, float('inf')),
-                                 margin=_STAND_HEIGHT/2)
-    upright = (1 + physics.torso_upright()) / 2
-    stand_reward = (3*standing + upright) / 4
-    if self._move_speed == 0:
-      return stand_reward
-    else:
-      move_reward = rewards.tolerance(physics.horizontal_velocity(),
-                                      bounds=(self._move_speed, float('inf')),
-                                      margin=self._move_speed/2,
-                                      value_at_margin=0.5,
-                                      sigmoid='linear')
-      return stand_reward * (5*move_reward + 1) / 6
+    """A planar walker task."""
+
+    def __init__(self, move_speed, random=None):
+        """Initializes an instance of `PlanarWalker`.
+        Args:
+          move_speed: A float. If this value is zero, reward is given simply for
+            standing up. Otherwise this specifies a target horizontal velocity for
+            the walking task.
+          random: Optional, either a `numpy.random.RandomState` instance, an
+            integer seed for creating a new `RandomState`, or None to select a seed
+            automatically (default).
+        """
+        self._move_speed = move_speed
+        super().__init__(random=random)
+
+    def initialize_episode(self, physics):
+        """Sets the state of the environment at the start of each episode.
+        In 'standing' mode, use initial orientation and small velocities.
+        In 'random' mode, randomize joint angles and let fall to the floor.
+        Args:
+          physics: An instance of `Physics`.
+        """
+        randomizers.randomize_limited_and_rotational_joints(physics, self.random)
+        super().initialize_episode(physics)
+
+    def get_observation(self, physics):
+        """Returns an observation of body orientations, height and velocites."""
+        obs = collections.OrderedDict()
+        obs['orientations'] = physics.orientations()
+        obs['height'] = physics.torso_height()
+        obs['velocity'] = physics.velocity()
+        return obs
+
+    def get_reward(self, physics):
+        """Returns a reward to the agent."""
+        standing = rewards.tolerance(physics.torso_height(),
+                                     bounds=(_STAND_HEIGHT, float('inf')),
+                                     margin=_STAND_HEIGHT/2)
+        upright = (1 + physics.torso_upright()) / 2
+        stand_reward = (3*standing + upright) / 4
+        if self._move_speed == 0:
+            return stand_reward
+        else:
+            move_reward = rewards.tolerance(physics.horizontal_velocity(),
+                                            bounds=(self._move_speed, float('inf')),
+                                            margin=self._move_speed/2,
+                                            value_at_margin=0.5,
+                                            sigmoid='linear')
+            return stand_reward * (5*move_reward + 1) / 6
diff --git a/carl/envs/dmc/loader.py b/carl/envs/dmc/loader.py
index c19604ed..3d7cd62d 100644
--- a/carl/envs/dmc/loader.py
+++ b/carl/envs/dmc/loader.py
@@ -4,9 +4,10 @@
 
 from carl.envs.dmc.dmc_tasks import walker, quadruped, fish
 
-_DOMAINS = {name: module for name, module in locals().items() 
+_DOMAINS = {name: module for name, module in locals().items()
             if inspect.ismodule(module) and hasattr(module, 'SUITE')}
 
+
 def load_dmc_env(domain_name, task_name, context={}, context_mask=[], task_kwargs=None, environment_kwargs=None,
                  visualize_reward=False):
 
diff --git a/carl/envs/dmc/wrappers.py b/carl/envs/dmc/wrappers.py
index 84d4a7be..c377be71 100644
--- a/carl/envs/dmc/wrappers.py
+++ b/carl/envs/dmc/wrappers.py
@@ -9,6 +9,7 @@
 ObsType = TypeVar("ObsType")
 ActType = TypeVar("ActType")
 
+
 class MujocoToGymWrapper(gym.Env):
     def __init__(self, env: Environment):
         # TODO set seeds
@@ -44,7 +45,8 @@ def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]:
             observation (object): agent's observation of the current environment
             reward (float) : amount of reward returned after previous action
             done (bool): whether the episode has ended, in which case further step() calls will return undefined results
-            info (dict): contains auxiliary diagnostic information (helpful for debugging, logging, and sometimes learning)
+            info (dict): contains auxiliary diagnostic information
+                            (helpful for debugging, logging, and sometimes learning)
         """
         timestep = self.env.step(action=action)
         step_type: StepType = timestep.step_type

From d236bd0e562978075bbd42572a96f92d634cca5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Wed, 22 Jun 2022 14:58:29 +0200
Subject: [PATCH 26/37] dmc env less duplicate code

---
 carl/envs/dmc/__init__.py          |  2 +-
 carl/envs/dmc/carl_dm_fish.py      | 24 ++----------------------
 carl/envs/dmc/carl_dm_quadruped.py | 24 ++----------------------
 carl/envs/dmc/carl_dm_walker.py    | 24 ++----------------------
 carl/envs/dmc/carl_dmcontrol.py    | 24 +++++++++++++++++++++---
 carl/envs/dmc/dmc_tasks/utils.py   |  4 ++--
 carl/envs/dmc/loader.py            |  2 +-
 7 files changed, 31 insertions(+), 73 deletions(-)

diff --git a/carl/envs/dmc/__init__.py b/carl/envs/dmc/__init__.py
index 65b477c9..e1cfa631 100644
--- a/carl/envs/dmc/__init__.py
+++ b/carl/envs/dmc/__init__.py
@@ -1,5 +1,5 @@
+# flake8: noqa: F401
 # Contexts and bounds by name
-
 from carl.envs.dmc.carl_dm_walker import (
     CARLDmcWalkerEnv,
     DEFAULT_CONTEXT as CARLDmcWalkerEnv_defaults,
diff --git a/carl/envs/dmc/carl_dm_fish.py b/carl/envs/dmc/carl_dm_fish.py
index d557295f..55e35d20 100644
--- a/carl/envs/dmc/carl_dm_fish.py
+++ b/carl/envs/dmc/carl_dm_fish.py
@@ -4,8 +4,6 @@
 
 from carl.utils.trial_logger import TrialLogger
 from carl.context.selection import AbstractSelector
-from carl.envs.dmc.wrappers import MujocoToGymWrapper
-from carl.envs.dmc.loader import load_dmc_env
 from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv
 
 
@@ -71,23 +69,9 @@ def __init__(
         context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
-        if not contexts:
-            contexts = {0: DEFAULT_CONTEXT}
-        self.domain = domain
-        self.task = task
-        if dict_observation_space:
-            raise NotImplementedError
-        else:
-            env = load_dmc_env(
-                domain_name=domain,
-                task_name=task,
-                context={},
-                context_mask=[],
-                environment_kwargs={"flat_observation": True}
-            )
-            env = MujocoToGymWrapper(env)
         super().__init__(
-            env=env,
+            domain=domain,
+            task=task,
             contexts=contexts,
             context_mask=context_mask,
             hide_context=hide_context,
@@ -102,7 +86,3 @@ def __init__(
             context_selector=context_selector,
             context_selector_kwargs=context_selector_kwargs,
         )
-        # TODO check gaussian noise on context features
-        self.whitelist_gaussian_noise = list(
-            DEFAULT_CONTEXT.keys()
-        )  # allow to augment all values
diff --git a/carl/envs/dmc/carl_dm_quadruped.py b/carl/envs/dmc/carl_dm_quadruped.py
index 95dd149b..e92f6e57 100644
--- a/carl/envs/dmc/carl_dm_quadruped.py
+++ b/carl/envs/dmc/carl_dm_quadruped.py
@@ -4,8 +4,6 @@
 
 from carl.utils.trial_logger import TrialLogger
 from carl.context.selection import AbstractSelector
-from carl.envs.dmc.wrappers import MujocoToGymWrapper
-from carl.envs.dmc.loader import load_dmc_env
 from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv
 
 
@@ -69,23 +67,9 @@ def __init__(
         context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
-        if not contexts:
-            contexts = {0: DEFAULT_CONTEXT}
-        self.domain = domain
-        self.task = task
-        if dict_observation_space:
-            raise NotImplementedError
-        else:
-            env = load_dmc_env(
-                domain_name=domain,
-                task_name=task,
-                context={},
-                context_mask=[],
-                environment_kwargs={"flat_observation": True}
-            )
-            env = MujocoToGymWrapper(env)
         super().__init__(
-            env=env,
+            domain=domain,
+            task=task,
             contexts=contexts,
             context_mask=context_mask,
             hide_context=hide_context,
@@ -100,7 +84,3 @@ def __init__(
             context_selector=context_selector,
             context_selector_kwargs=context_selector_kwargs,
         )
-        # TODO check gaussian noise on context features
-        self.whitelist_gaussian_noise = list(
-            DEFAULT_CONTEXT.keys()
-        )  # allow to augment all values
diff --git a/carl/envs/dmc/carl_dm_walker.py b/carl/envs/dmc/carl_dm_walker.py
index 0083866c..bd656a2e 100644
--- a/carl/envs/dmc/carl_dm_walker.py
+++ b/carl/envs/dmc/carl_dm_walker.py
@@ -4,8 +4,6 @@
 
 from carl.utils.trial_logger import TrialLogger
 from carl.context.selection import AbstractSelector
-from carl.envs.dmc.wrappers import MujocoToGymWrapper
-from carl.envs.dmc.loader import load_dmc_env
 from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv
 
 
@@ -69,23 +67,9 @@ def __init__(
         context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
-        if not contexts:
-            contexts = {0: DEFAULT_CONTEXT}
-        self.domain = domain
-        self.task = task
-        if dict_observation_space:
-            raise NotImplementedError
-        else:
-            env = load_dmc_env(
-                domain_name=domain,
-                task_name=task,
-                context={},
-                context_mask=[],
-                environment_kwargs={"flat_observation": True}
-            )
-            env = MujocoToGymWrapper(env)
         super().__init__(
-            env=env,
+            domain=domain,
+            task=task,
             contexts=contexts,
             context_mask=context_mask,
             hide_context=hide_context,
@@ -100,7 +84,3 @@ def __init__(
             context_selector=context_selector,
             context_selector_kwargs=context_selector_kwargs,
         )
-        # TODO check gaussian noise on context features
-        self.whitelist_gaussian_noise = list(
-            DEFAULT_CONTEXT.keys()
-        )  # allow to augment all values
diff --git a/carl/envs/dmc/carl_dmcontrol.py b/carl/envs/dmc/carl_dmcontrol.py
index c3c7f056..0f43961d 100644
--- a/carl/envs/dmc/carl_dmcontrol.py
+++ b/carl/envs/dmc/carl_dmcontrol.py
@@ -1,7 +1,5 @@
 from typing import Any, Dict, List, Union, Optional
 
-import gym
-
 from carl.envs.carl_env import CARLEnv
 from carl.envs.dmc.wrappers import MujocoToGymWrapper
 from carl.envs.dmc.loader import load_dmc_env
@@ -12,7 +10,8 @@
 class CARLDmcEnv(CARLEnv):
     def __init__(
         self,
-        env: gym.Env,
+        domain: str,
+        task: str,
         contexts: Dict[Any, Dict[Any, Any]],
         context_mask: Optional[List[str]],
         hide_context: bool,
@@ -29,6 +28,21 @@ def __init__(
     ):
         # TODO can we have more than 1 env?
         # env = MujocoToGymWrapper(env)
+        if not contexts:
+            contexts = {0: default_context}
+        self.domain = domain
+        self.task = task
+        if dict_observation_space:
+            raise NotImplementedError
+        else:
+            env = load_dmc_env(
+                domain_name=self.domain,
+                task_name=self.task,
+                context={},
+                context_mask=[],
+                environment_kwargs={"flat_observation": True}
+            )
+            env = MujocoToGymWrapper(env)
         self.context_mask = context_mask
         super().__init__(
             env=env,
@@ -45,6 +59,10 @@ def __init__(
             context_selector=context_selector,
             context_selector_kwargs=context_selector_kwargs,
         )
+        # TODO check gaussian noise on context features
+        self.whitelist_gaussian_noise = list(
+            default_context.keys()
+        )  # allow to augment all values
 
     def _update_context(self) -> None:
         if self.dict_observation_space:
diff --git a/carl/envs/dmc/dmc_tasks/utils.py b/carl/envs/dmc/dmc_tasks/utils.py
index 35ea407f..54419259 100644
--- a/carl/envs/dmc/dmc_tasks/utils.py
+++ b/carl/envs/dmc/dmc_tasks/utils.py
@@ -9,7 +9,7 @@ def adapt_context(xml_string, context, context_mask=[]):
         default = etree.Element("default")
         mjcf.addnext(default)
 
-    if "joint_daming" not in context_mask:
+    if "joint_damping" not in context_mask:
         # adjust damping for all joints if damping is already an attribute
         for joint_find in mjcf.findall(".//joint[@damping]"):
             joint_damping = joint_find.get("damping")
@@ -26,7 +26,7 @@ def adapt_context(xml_string, context, context_mask=[]):
     if joint is None:
         joint = etree.Element("joint")
         default.addnext(joint)
-        if "joint_daming" not in context_mask:
+        if "joint_damping" not in context_mask:
             def_joint_damping = 0.1
             default_joint_damping = str(float(def_joint_damping) * context["joint_damping"])
             joint.set("damping", default_joint_damping)
diff --git a/carl/envs/dmc/loader.py b/carl/envs/dmc/loader.py
index 3d7cd62d..4935425d 100644
--- a/carl/envs/dmc/loader.py
+++ b/carl/envs/dmc/loader.py
@@ -2,7 +2,7 @@
 
 from dm_control import suite
 
-from carl.envs.dmc.dmc_tasks import walker, quadruped, fish
+from carl.envs.dmc.dmc_tasks import walker, quadruped, fish  # noqa: F401
 
 _DOMAINS = {name: module for name, module in locals().items()
             if inspect.ismodule(module) and hasattr(module, 'SUITE')}

From f1c2ddd74f6f27a57efe54942e1f9810bff03d4b Mon Sep 17 00:00:00 2001
From: Carolin Benjamins <benjamins@tnt.uni-hannover.de>
Date: Fri, 24 Jun 2022 10:43:50 +0200
Subject: [PATCH 27/37] Rename/move file

---
 test_dm_control.py => carl/envs/dmc/try_dm_control.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename test_dm_control.py => carl/envs/dmc/try_dm_control.py (100%)

diff --git a/test_dm_control.py b/carl/envs/dmc/try_dm_control.py
similarity index 100%
rename from test_dm_control.py
rename to carl/envs/dmc/try_dm_control.py

From 433fa0eefb212303b076d2cd28525069287f1b65 Mon Sep 17 00:00:00 2001
From: Carolin Benjamins <benjamins@tnt.uni-hannover.de>
Date: Fri, 24 Jun 2022 10:47:50 +0200
Subject: [PATCH 28/37] Fix tests

---
 test/test_all_envs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_all_envs.py b/test/test_all_envs.py
index 9169e825..141b06b4 100644
--- a/test/test_all_envs.py
+++ b/test/test_all_envs.py
@@ -9,7 +9,7 @@ class TestInitEnvs(unittest.TestCase):
     def test_init_all_envs(self):
         global_vars = vars(carl.envs)
         mustinclude = "CARL"
-        forbidden = ["defaults", "bounds"]
+        forbidden = ["defaults", "bounds", "mask"]
         for varname, var in global_vars.items():
             if mustinclude in varname and not np.any([f in varname for f in forbidden]):
                 try:

From 8b951a56e6cd57b9856540e02a9b39f8dc432b52 Mon Sep 17 00:00:00 2001
From: Carolin Benjamins <benjamins@tnt.uni-hannover.de>
Date: Mon, 27 Jun 2022 12:25:25 +0200
Subject: [PATCH 29/37] Change default tasks

---
 carl/envs/dmc/carl_dm_fish.py   | 2 +-
 carl/envs/dmc/carl_dm_walker.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/carl/envs/dmc/carl_dm_fish.py b/carl/envs/dmc/carl_dm_fish.py
index 55e35d20..8deedb8a 100644
--- a/carl/envs/dmc/carl_dm_fish.py
+++ b/carl/envs/dmc/carl_dm_fish.py
@@ -54,7 +54,7 @@ class CARLDmcFishEnv(CARLDmcEnv):
     def __init__(
         self,
         domain: str = "fish",
-        task: str = "upright_context",
+        task: str = "swim_context",
         contexts: Dict[Any, Dict[Any, Any]] = {},
         context_mask: Optional[List[str]] = [],
         hide_context: bool = False,
diff --git a/carl/envs/dmc/carl_dm_walker.py b/carl/envs/dmc/carl_dm_walker.py
index bd656a2e..75fdb0c3 100644
--- a/carl/envs/dmc/carl_dm_walker.py
+++ b/carl/envs/dmc/carl_dm_walker.py
@@ -52,7 +52,7 @@ class CARLDmcWalkerEnv(CARLDmcEnv):
     def __init__(
         self,
         domain: str = "walker",
-        task: str = "stand_context",
+        task: str = "walk_context",
         contexts: Dict[Any, Dict[Any, Any]] = {},
         context_mask: Optional[List[str]] = [],
         hide_context: bool = False,

From 5d9f99487361d4d0f397306f52349c2f379f494f Mon Sep 17 00:00:00 2001
From: Carolin Benjamins <benjamins@tnt.uni-hannover.de>
Date: Mon, 27 Jun 2022 14:00:46 +0200
Subject: [PATCH 30/37] Adjust episode lengths to 1000

According to dm control settings.
---
 carl/envs/dmc/carl_dm_fish.py        | 3 ++-
 carl/envs/dmc/carl_dm_quadruped.py   | 3 ++-
 carl/envs/dmc/carl_dm_walker.py      | 3 ++-
 carl/envs/dmc/dmc_tasks/fish.py      | 1 +
 carl/envs/dmc/dmc_tasks/quadruped.py | 1 +
 carl/envs/dmc/dmc_tasks/walker.py    | 2 ++
 6 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/carl/envs/dmc/carl_dm_fish.py b/carl/envs/dmc/carl_dm_fish.py
index 8deedb8a..c55d5371 100644
--- a/carl/envs/dmc/carl_dm_fish.py
+++ b/carl/envs/dmc/carl_dm_fish.py
@@ -5,6 +5,7 @@
 from carl.utils.trial_logger import TrialLogger
 from carl.context.selection import AbstractSelector
 from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv
+from carl.envs.dmc.dmc_tasks.fish import STEP_LIMIT
 
 
 DEFAULT_CONTEXT = {
@@ -63,7 +64,7 @@ def __init__(
         logger: Optional[TrialLogger] = None,
         scale_context_features: str = "no",
         default_context: Optional[Dict] = DEFAULT_CONTEXT,
-        max_episode_length: int = 500,  # from https://github.com/openai/gym/blob/master/gym/envs/__init__.py
+        max_episode_length: int = STEP_LIMIT,
         state_context_features: Optional[List[str]] = None,
         dict_observation_space: bool = False,
         context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
diff --git a/carl/envs/dmc/carl_dm_quadruped.py b/carl/envs/dmc/carl_dm_quadruped.py
index e92f6e57..bc56da8c 100644
--- a/carl/envs/dmc/carl_dm_quadruped.py
+++ b/carl/envs/dmc/carl_dm_quadruped.py
@@ -5,6 +5,7 @@
 from carl.utils.trial_logger import TrialLogger
 from carl.context.selection import AbstractSelector
 from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv
+from carl.envs.dmc.dmc_tasks.quadruped import STEP_LIMIT
 
 
 DEFAULT_CONTEXT = {
@@ -61,7 +62,7 @@ def __init__(
         logger: Optional[TrialLogger] = None,
         scale_context_features: str = "no",
         default_context: Optional[Dict] = DEFAULT_CONTEXT,
-        max_episode_length: int = 500,  # from https://github.com/openai/gym/blob/master/gym/envs/__init__.py
+        max_episode_length: int = STEP_LIMIT,
         state_context_features: Optional[List[str]] = None,
         dict_observation_space: bool = False,
         context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
diff --git a/carl/envs/dmc/carl_dm_walker.py b/carl/envs/dmc/carl_dm_walker.py
index 75fdb0c3..ee891419 100644
--- a/carl/envs/dmc/carl_dm_walker.py
+++ b/carl/envs/dmc/carl_dm_walker.py
@@ -5,6 +5,7 @@
 from carl.utils.trial_logger import TrialLogger
 from carl.context.selection import AbstractSelector
 from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv
+from carl.envs.dmc.dmc_tasks.walker import STEP_LIMIT
 
 
 DEFAULT_CONTEXT = {
@@ -61,7 +62,7 @@ def __init__(
         logger: Optional[TrialLogger] = None,
         scale_context_features: str = "no",
         default_context: Optional[Dict] = DEFAULT_CONTEXT,
-        max_episode_length: int = 500,  # from https://github.com/openai/gym/blob/master/gym/envs/__init__.py
+        max_episode_length: int = STEP_LIMIT,
         state_context_features: Optional[List[str]] = None,
         dict_observation_space: bool = False,
         context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
diff --git a/carl/envs/dmc/dmc_tasks/fish.py b/carl/envs/dmc/dmc_tasks/fish.py
index 869511f0..47dc6d25 100644
--- a/carl/envs/dmc/dmc_tasks/fish.py
+++ b/carl/envs/dmc/dmc_tasks/fish.py
@@ -29,6 +29,7 @@
 
 _DEFAULT_TIME_LIMIT = 40
 _CONTROL_TIMESTEP = .04
+STEP_LIMIT = 1000
 _JOINTS = ['tail1',
            'tail_twist',
            'tail2',
diff --git a/carl/envs/dmc/dmc_tasks/quadruped.py b/carl/envs/dmc/dmc_tasks/quadruped.py
index 6b865c07..ed1bf5ad 100644
--- a/carl/envs/dmc/dmc_tasks/quadruped.py
+++ b/carl/envs/dmc/dmc_tasks/quadruped.py
@@ -36,6 +36,7 @@
 
 _DEFAULT_TIME_LIMIT = 20
 _CONTROL_TIMESTEP = .02
+STEP_LIMIT = 1000
 
 # Horizontal speeds above which the move reward is 1.
 _RUN_SPEED = 5
diff --git a/carl/envs/dmc/dmc_tasks/walker.py b/carl/envs/dmc/dmc_tasks/walker.py
index cf2679bf..3c32bda5 100644
--- a/carl/envs/dmc/dmc_tasks/walker.py
+++ b/carl/envs/dmc/dmc_tasks/walker.py
@@ -37,6 +37,8 @@
 _WALK_SPEED = 1
 _RUN_SPEED = 8
 
+STEP_LIMIT = 1000
+
 
 SUITE = containers.TaggedTasks()
 

From 037b2e99676b80f8adcee4708186f5d82ba15765 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Fri, 1 Jul 2022 11:17:00 +0200
Subject: [PATCH 31/37] add docstring

---
 carl/envs/dmc/carl_dmcontrol.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/carl/envs/dmc/carl_dmcontrol.py b/carl/envs/dmc/carl_dmcontrol.py
index 0f43961d..b4e2c5ea 100644
--- a/carl/envs/dmc/carl_dmcontrol.py
+++ b/carl/envs/dmc/carl_dmcontrol.py
@@ -8,6 +8,25 @@
 
 
 class CARLDmcEnv(CARLEnv):
+    """
+    General class for the dm-control environments.
+
+    Meta-class to change the context for the environments.
+
+    Parameters
+    ----------
+    domain : str
+        Dm-control domain that should be loaded.
+    task : str
+        Task within the specified domain.
+
+    For descriptions of the other parameters see the parent class CARLEnv.
+
+    Raises
+    ------
+    NotImplementedError
+        Dict observation spaces are not implemented for dm-control yet.
+    """
     def __init__(
         self,
         domain: str,

From 2d23f17a679343d167a4a17dc18118d640b35079 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Fri, 1 Jul 2022 11:33:46 +0200
Subject: [PATCH 32/37] update readme

---
 README.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b00476f5..26263f17 100644
--- a/README.md
+++ b/README.md
@@ -24,6 +24,8 @@ Benchmarks include:
 - [RNADesign](https://github.com/automl/learna/), an environment for RNA design given structure
   constraints with structures from different datasets to choose from
 
+- [dm_control](https://github.com/deepmind/dm_control), environments based on the MuJoCo physics engine. The environments are extended with different context features.
+
 ![Screenshot of each environment included in CARL.](./docs/source/figures/envs_overview.png)
 
 For more information, check out our [documentation](https://carl.readthedocs.io/en/latest/)!
@@ -42,7 +44,7 @@ pip install .
 
 This will only install the basic classic control environments, which should run on most operating systems. For the full set of environments, use the install options:
 ```bash
-pip install -e .[box2d, brax, rna, mario]
+pip install -e .[box2d, brax, rna, mario, dm_control]
 ```
 
 These may not be compatible with Windows systems. Box2D environment may need to be installed via conda on MacOS systems:
@@ -95,6 +97,8 @@ Awiszus et al., AIIDE 2020](https://arxiv.org/pdf/2008.01531.pdf)
 
 [Learning to Design RNA, Runge et al., ICRL 2019](https://arxiv.org/pdf/1812.11951.pdf)
 
+[dm_control: Software and Tasks for Continuous Control](https://arxiv.org/pdf/2006.12983.pdf)
+
 ## License
 CARL falls under the Apache License 2.0 (see file 'LICENSE') as is permitted by all
 work that we use. This includes CARLMario, which is not based on the Nintendo Game, but on

From f5c6557f0e16fa26c790a924c27420b5e40e462b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Fri, 1 Jul 2022 11:53:17 +0200
Subject: [PATCH 33/37] flake8 formatting

---
 carl/envs/box2d/carl_bipedal_walker.py        |  5 +-
 carl/envs/box2d/carl_lunarlander.py           |  4 -
 carl/envs/carl_env.py                         |  2 +-
 .../carl_mountaincarcontinuous.py             |  2 +-
 carl/envs/dmc/try_dm_control.py               |  3 +-
 carl/envs/rna/carl_rna.py                     |  6 +-
 carl/envs/rna/carl_rna_definitions.py         | 26 ++++--
 carl/utils/types.py                           |  2 +-
 docs/conf.py                                  | 86 +++++++++----------
 docs/themes/smac/smac_theme.py                |  2 +-
 test/local_only/test_rna_env.py               |  2 -
 test/test_CARLEnv.py                          |  2 +-
 test/test_selector.py                         |  2 +-
 13 files changed, 73 insertions(+), 71 deletions(-)

diff --git a/carl/envs/box2d/carl_bipedal_walker.py b/carl/envs/box2d/carl_bipedal_walker.py
index 3f0bdad5..965fb906 100644
--- a/carl/envs/box2d/carl_bipedal_walker.py
+++ b/carl/envs/box2d/carl_bipedal_walker.py
@@ -1,12 +1,9 @@
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Union
 
-import Box2D
 import numpy as np
 from Box2D.b2 import edgeShape, fixtureDef, polygonShape
-from gym import spaces
 from gym.envs.box2d import bipedal_walker
 from gym.envs.box2d import bipedal_walker as bpw
-from gym.utils import EzPickle
 
 from carl.envs.carl_env import CARLEnv
 from carl.utils.trial_logger import TrialLogger
diff --git a/carl/envs/box2d/carl_lunarlander.py b/carl/envs/box2d/carl_lunarlander.py
index 89ab9e8a..16bba63d 100644
--- a/carl/envs/box2d/carl_lunarlander.py
+++ b/carl/envs/box2d/carl_lunarlander.py
@@ -1,13 +1,9 @@
 from typing import Any, Dict, List, Optional, Tuple, Union, TypeVar
 
-import Box2D
-import numpy as np
-from gym import spaces
 from gym import Wrapper
 
 from gym.envs.box2d import lunar_lander
 from gym.envs.box2d.lunar_lander import heuristic
-from gym.utils import EzPickle, seeding
 
 from carl.envs.carl_env import CARLEnv
 from carl.utils.trial_logger import TrialLogger
diff --git a/carl/envs/carl_env.py b/carl/envs/carl_env.py
index 89bd230b..7bfd2829 100644
--- a/carl/envs/carl_env.py
+++ b/carl/envs/carl_env.py
@@ -190,7 +190,7 @@ def __init__(
             self.context_feature_scale_factors[
                 self.context_feature_scale_factors == 0
             ] = 1  # otherwise value / scale_factor = nan
-       
+
         self.vectorized = n_envs > 1
         self.build_observation_space()
         self._update_context()
diff --git a/carl/envs/classic_control/carl_mountaincarcontinuous.py b/carl/envs/classic_control/carl_mountaincarcontinuous.py
index a88ccea6..aa2a70ae 100644
--- a/carl/envs/classic_control/carl_mountaincarcontinuous.py
+++ b/carl/envs/classic_control/carl_mountaincarcontinuous.py
@@ -101,7 +101,7 @@ def __init__(
             scale_context_features=scale_context_features,
             default_context=default_context,
             max_episode_length=max_episode_length,
-            state_context_features = state_context_features,
+            state_context_features=state_context_features,
             dict_observation_space=dict_observation_space,
             context_selector=context_selector,
             context_selector_kwargs=context_selector_kwargs,
diff --git a/carl/envs/dmc/try_dm_control.py b/carl/envs/dmc/try_dm_control.py
index fc48dd62..62043887 100644
--- a/carl/envs/dmc/try_dm_control.py
+++ b/carl/envs/dmc/try_dm_control.py
@@ -1,4 +1,4 @@
-import imp
+# flake8: noqa: F401
 from carl.envs import CARLDmcWalkerEnv
 from carl.envs import CARLDmcQuadrupedEnv
 from carl.envs import CARLDmcFishEnv
@@ -12,7 +12,6 @@
 
 if __name__ == "__main__":
     # Load one task:
-    
     stronger_act = walker_default.copy()
     stronger_act["actuator_strength"] = walker_default["actuator_strength"]*2
     contexts = {0: stronger_act}
diff --git a/carl/envs/rna/carl_rna.py b/carl/envs/rna/carl_rna.py
index 58abfeab..3bdcf9ac 100644
--- a/carl/envs/rna/carl_rna.py
+++ b/carl/envs/rna/carl_rna.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Dict, Optional, Union
 
 import gym
 import numpy as np
@@ -22,7 +22,7 @@
 class RnaGymWrapper(object):
     def __init__(self, env):
         self.env = env
-        
+
     def reset(self):
         state = self.env.reset()
         state = np.array(state).flatten()
@@ -43,7 +43,7 @@ def __getattr__(self, name):
 class CARLRnaDesignEnv(CARLEnv):
     def __init__(
         self,
-        env = None,
+        env: gym.Env = None,
         data_location: str = "envs/rna/learna/data",
         contexts: Dict[str, Dict] = {},
         hide_context: bool = False,
diff --git a/carl/envs/rna/carl_rna_definitions.py b/carl/envs/rna/carl_rna_definitions.py
index 9e464783..f5090aed 100644
--- a/carl/envs/rna/carl_rna_definitions.py
+++ b/carl/envs/rna/carl_rna_definitions.py
@@ -1,10 +1,10 @@
 import numpy as np
 from gym import spaces
 
-#FIXME: how much sense do these make? Eterna solvers are unique and I'm not sure how to get the solvers for taneda
-#SOLVER_LIST_ETERNA = [22230]
-#SOLVER_LIST_RFAM_TANEDA = [None]
-#SOLVER_LIST_RFAM_LEARN = [None]
+# FIXME: how much sense do these make? Eterna solvers are unique and I'm not sure how to get the solvers for taneda
+# SOLVER_LIST_ETERNA = [22230]
+# SOLVER_LIST_RFAM_TANEDA = [None]
+# SOLVER_LIST_RFAM_LEARN = [None]
 
 ID_LIST_ETERNA = np.arange(1, 101)
 ID_LIST_RFAM_TANEDA = np.arange(1, 30)
@@ -17,15 +17,27 @@
     "dataset": "rfam_taneda",
     "target_structure_ids": None,
     # if solvers is set to 'None', all solvers are eligible
-#    "solvers": None,
+    # "solvers": None,
 }
 CONTEXT_BOUNDS = {
     "mutation_threshold": (0.1, np.inf, float),
     "reward_exponent": (0.1, np.inf, float),
     "state_radius": (1, np.inf, float),
     "dataset": (None, None, "categorical", ["eterna", "rfam_taneda", "rfam_learn", None]),
-    "target_structure_ids": (None, None, "conditional", {"eterna": ID_LIST_ETERNA, "rfam_taneda": ID_LIST_RFAM_TANEDA, "rfan_learn": ID_LIST_RFAM_LEARN, None: [None]}, "dataset"),
-#    "solvers": {"eterna": SOLVER_LIST_ETERNA, "rfam_taneda": SOLVER_LIST_RFAM_TANEDA, "rfan_learn": SOLVER_LIST_RFAM_LEARN, None: [None]},
+    "target_structure_ids": (
+        None, None, "conditional",
+        {
+            "eterna": ID_LIST_ETERNA,
+            "rfam_taneda": ID_LIST_RFAM_TANEDA,
+            "rfan_learn": ID_LIST_RFAM_LEARN, None: [None]
+        },
+        "dataset"
+    ),
+    # "solvers": {
+    #     "eterna": SOLVER_LIST_ETERNA,
+    #     "rfam_taneda": SOLVER_LIST_RFAM_TANEDA,
+    #     "rfan_learn": SOLVER_LIST_RFAM_LEARN, None: [None]
+    # },
 }
 
 ACTION_SPACE = spaces.Discrete(4)
diff --git a/carl/utils/types.py b/carl/utils/types.py
index 0b633ee3..fcb63e46 100644
--- a/carl/utils/types.py
+++ b/carl/utils/types.py
@@ -1,3 +1,3 @@
 from typing import Dict, Any
 
-Context = Dict[str, Any]
\ No newline at end of file
+Context = Dict[str, Any]
diff --git a/docs/conf.py b/docs/conf.py
index ad83c634..41b70572 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -11,13 +11,13 @@
 #
 # All configuration values have a default; values that are commented out
 # serve to show the default.
-
+# flake8: noqa: E402
 import datetime
 import sys
 import os
 
 sys.path.insert(0, '..')
-#sys.path.insert(0, os.path.abspath('..'))
+# sys.path.insert(0, os.path.abspath('..'))
 
 __copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
 __license__ = "3-clause BSD"
@@ -34,7 +34,7 @@
 # -- General configuration ------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
-#needs_sphinx = '1.0'
+# needs_sphinx = '1.0'
 
 # Important, otherwise `smac_theme` is not recognized
 sys.path.append(os.path.abspath("./themes/smac"))
@@ -73,7 +73,7 @@
 exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 
 # The encoding of source files.
-#source_encoding = 'utf-8-sig'
+# source_encoding = 'utf-8-sig'
 
 # The master toctree document.
 master_doc = 'index'
@@ -101,9 +101,9 @@
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
-#today = ''
+# today = ''
 # Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
+# today_fmt = '%B %d, %Y'
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
@@ -111,27 +111,27 @@
 
 # The reST default role (used for this markup: `text`) to use for all
 # documents.
-#default_role = None
+# default_role = None
 
 # If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
+# add_function_parentheses = True
 
 # If true, the current module name will be prepended to all description
 # unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
 
 # If true, sectionauthor and moduleauthor directives will be shown in the
 # output. They are ignored by default.
-#show_authors = False
+# show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'
 
 # A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
+# modindex_common_prefix = []
 
 # If true, keep warnings as "system message" paragraphs in the built documents.
-#keep_warnings = False
+# keep_warnings = False
 
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 # todo_include_todos = False
@@ -156,14 +156,14 @@
 # Add any paths that contain custom themes here, relative to this directory.
 html_theme_path = ['themes']
 
-#using_rtd_theme = True
+# using_rtd_theme = True
 
 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
-#html_title = None
+# html_title = None
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
-#html_short_title = None
+# html_short_title = None
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
@@ -172,7 +172,7 @@
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-#html_favicon = None
+# html_favicon = None
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
@@ -183,62 +183,62 @@
 # Add any extra paths that contain custom files (such as robots.txt or
 # .htaccess) here, relative to this directory. These files are copied
 # directly to the root of the documentation.
-#html_extra_path = []
+# html_extra_path = []
 
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
+# html_last_updated_fmt = '%b %d, %Y'
 
 # If true, SmartyPants will be used to convert quotes and dashes to
 # typographically correct entities.
-#html_use_smartypants = True
+# html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
 # html_sidebars = {'**': ['globaltoc.html']}
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
-#html_additional_pages = {}
+# html_additional_pages = {}
 
 # If false, no module index is generated.
-#html_domain_indices = True
+# html_domain_indices = True
 
 # If false, no index is generated.
-#html_use_index = True
+# html_use_index = True
 
 # If true, the index is split into individual pages for each letter.
-#html_split_index = False
+# html_split_index = False
 
 # If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
+# html_show_sourcelink = True
 
 # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#html_show_sphinx = True
+# html_show_sphinx = True
 
 # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#html_show_copyright = True
+# html_show_copyright = True
 
 # If true, an OpenSearch description file will be output, and all pages will
 # contain a <link> tag referring to it.  The value of this option must be the
 # base URL from which the finished HTML is served.
-#html_use_opensearch = ''
+# html_use_opensearch = ''
 
 # This is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = None
+# html_file_suffix = None
 
 # Language to be used for generating the HTML full-text search index.
 # Sphinx supports the following languages:
 #   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
 #   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr'
-#html_search_language = 'en'
+# html_search_language = 'en'
 
 # A dictionary with options for the search language support, empty by default.
 # Now only 'ja' uses this config value
-#html_search_options = {'type': 'default'}
+# html_search_options = {'type': 'default'}
 
 # The name of a javascript file (relative to the configuration directory) that
 # implements a search results scorer. If empty, the default will be used.
-#html_search_scorer = 'scorer.js'
+# html_search_scorer = 'scorer.js'
 
 # Output file base name for HTML help builder.
 # htmlhelp_basename = 'SMAC3doc'
@@ -268,23 +268,23 @@
 
 # The name of an image file (relative to this directory) to place at the top of
 # the title page.
-#latex_logo = None
+# latex_logo = None
 
 # For "manual" documents, if this is true, then toplevel headings are parts,
 # not chapters.
-#latex_use_parts = False
+# latex_use_parts = False
 
 # If true, show page references after internal links.
-#latex_show_pagerefs = False
+# latex_show_pagerefs = False
 
 # If true, show URL addresses after external links.
-#latex_show_urls = False
+# latex_show_urls = False
 
 # Documents to append as an appendix to all manuals.
-#latex_appendices = []
+# latex_appendices = []
 
 # If false, no module index is generated.
-#latex_domain_indices = True
+# latex_domain_indices = True
 
 
 # -- Options for manual page output ---------------------------------------
@@ -297,7 +297,7 @@
 # ]
 
 # If true, show URL addresses after external links.
-#man_show_urls = False
+# man_show_urls = False
 
 
 # -- Options for Texinfo output -------------------------------------------
@@ -312,19 +312,19 @@
 # ]
 
 # Documents to append as an appendix to all manuals.
-#texinfo_appendices = []
+# texinfo_appendices = []
 
 # If false, no module index is generated.
-#texinfo_domain_indices = True
+# texinfo_domain_indices = True
 
 # How to display URL addresses: 'footnote', 'no', or 'inline'.
-#texinfo_show_urls = 'footnote'
+# texinfo_show_urls = 'footnote'
 
 # If true, do not generate a @detailmenu in the "Top" node's menu.
-#texinfo_no_detailmenu = False
+# texinfo_no_detailmenu = False
 
 # Show init as well as moduledoc
-#autoclass_content = 'both'
+# autoclass_content = 'both'
 
 # Sphinx-gallery configuration.
 sphinx_gallery_conf = {
diff --git a/docs/themes/smac/smac_theme.py b/docs/themes/smac/smac_theme.py
index 4c6a9681..63b44a95 100644
--- a/docs/themes/smac/smac_theme.py
+++ b/docs/themes/smac/smac_theme.py
@@ -1,7 +1,7 @@
 """
 Bootstrap-based sphinx theme from the PyData community
 """
-
+# flake8: noqa: E402
 # mypy: ignore-errors
 
 import os
diff --git a/test/local_only/test_rna_env.py b/test/local_only/test_rna_env.py
index f7757b2b..f0c9e968 100644
--- a/test/local_only/test_rna_env.py
+++ b/test/local_only/test_rna_env.py
@@ -1,7 +1,5 @@
 import unittest
 
-import numpy as np
-
 from carl.envs.rna.carl_rna import CARLRnaDesignEnv, RnaGymWrapper
 
 
diff --git a/test/test_CARLEnv.py b/test/test_CARLEnv.py
index e21765d4..c0d44c58 100644
--- a/test/test_CARLEnv.py
+++ b/test/test_CARLEnv.py
@@ -339,7 +339,7 @@ def test_random_selectorclass_init(self):
     def test_unknown_selector_init(self):
         with self.assertRaises(ValueError):
             contexts = self.generate_contexts()
-            env = CARLPendulumEnv(contexts=contexts, context_selector="bork")
+            _ = CARLPendulumEnv(contexts=contexts, context_selector="bork")
 
 
 if __name__ == "__main__":
diff --git a/test/test_selector.py b/test/test_selector.py
index 3e4c8343..36789fff 100644
--- a/test/test_selector.py
+++ b/test/test_selector.py
@@ -75,4 +75,4 @@ def selector_function(inst: AbstractSelector):
 
         selector.select()
         self.assertEqual(selector.context_id, 0)
-        self.assertEqual(selector.contexts_keys[selector.context_id], "a")
\ No newline at end of file
+        self.assertEqual(selector.contexts_keys[selector.context_id], "a")

From 47254f51903edab0a487bbd4fceda107077fc941 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Fri, 1 Jul 2022 13:37:31 +0200
Subject: [PATCH 34/37] dmc dict observation space

---
 carl/envs/dmc/carl_dmcontrol.py | 39 ++++++++++++++-------------------
 carl/envs/dmc/try_dm_control.py | 23 ++++++++++---------
 2 files changed, 27 insertions(+), 35 deletions(-)

diff --git a/carl/envs/dmc/carl_dmcontrol.py b/carl/envs/dmc/carl_dmcontrol.py
index b4e2c5ea..2b5a8913 100644
--- a/carl/envs/dmc/carl_dmcontrol.py
+++ b/carl/envs/dmc/carl_dmcontrol.py
@@ -46,22 +46,18 @@ def __init__(
         context_selector_kwargs: Optional[Dict],
     ):
         # TODO can we have more than 1 env?
-        # env = MujocoToGymWrapper(env)
         if not contexts:
             contexts = {0: default_context}
         self.domain = domain
         self.task = task
-        if dict_observation_space:
-            raise NotImplementedError
-        else:
-            env = load_dmc_env(
-                domain_name=self.domain,
-                task_name=self.task,
-                context={},
-                context_mask=[],
-                environment_kwargs={"flat_observation": True}
-            )
-            env = MujocoToGymWrapper(env)
+        env = load_dmc_env(
+            domain_name=self.domain,
+            task_name=self.task,
+            context={},
+            context_mask=[],
+            environment_kwargs={"flat_observation": True}
+        )
+        env = MujocoToGymWrapper(env)
         self.context_mask = context_mask
         super().__init__(
             env=env,
@@ -84,14 +80,11 @@ def __init__(
         )  # allow to augment all values
 
     def _update_context(self) -> None:
-        if self.dict_observation_space:
-            raise NotImplementedError
-        else:
-            env = load_dmc_env(
-                domain_name=self.domain,
-                task_name=self.task,
-                context=self.context,
-                context_mask=self.context_mask,
-                environment_kwargs={"flat_observation": True}
-            )
-            self.env = MujocoToGymWrapper(env)
+        env = load_dmc_env(
+            domain_name=self.domain,
+            task_name=self.task,
+            context=self.context,
+            context_mask=self.context_mask,
+            environment_kwargs={"flat_observation": True}
+        )
+        self.env = MujocoToGymWrapper(env)
diff --git a/carl/envs/dmc/try_dm_control.py b/carl/envs/dmc/try_dm_control.py
index 62043887..ed026942 100644
--- a/carl/envs/dmc/try_dm_control.py
+++ b/carl/envs/dmc/try_dm_control.py
@@ -15,7 +15,7 @@
     stronger_act = walker_default.copy()
     stronger_act["actuator_strength"] = walker_default["actuator_strength"]*2
     contexts = {0: stronger_act}
-    carl_env = CARLDmcWalkerEnv(task="stand_context", contexts=contexts, context_mask=walker_mask, hide_context=False)
+    carl_env = CARLDmcWalkerEnv(task="stand_context", contexts=contexts, context_mask=walker_mask, hide_context=False, dict_observation_space=True)
 
     # stronger_act = quadruped_default.copy()
     # stronger_act["actuator_strength"] = quadruped_default["actuator_strength"]*2
@@ -28,18 +28,17 @@
     render = lambda : plt.imshow(carl_env.render(mode='rgb_array'))
     s = carl_env.reset()
     render()
-    plt.savefig("asdf_dm.png")
+    # plt.savefig("dm_render.png")
     action = carl_env.action_space.sample()
     state, reward, done, info = carl_env.step(action=action)
     print("state", state, type(state))
-    assert False
 
-    s = carl_env.reset()
-    done = False
-    i = 0
-    while not done:
-        action = carl_env.action_space.sample()
-        state, reward, done, info = carl_env.step(action=action)
-        print(state, action, reward, done)
-        i += 1
-    print(i)
+    # s = carl_env.reset()
+    # done = False
+    # i = 0
+    # while not done:
+    #     action = carl_env.action_space.sample()
+    #     state, reward, done, info = carl_env.step(action=action)
+    #     print(state, action, reward, done)
+    #     i += 1
+    # print(i)

From ebe8ea3b64b48d1400e5d8ce0e51ddc6229a0373 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Fri, 1 Jul 2022 13:38:48 +0200
Subject: [PATCH 35/37] import list

---
 carl/envs/rna/carl_rna.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/carl/envs/rna/carl_rna.py b/carl/envs/rna/carl_rna.py
index 3bdcf9ac..bac64222 100644
--- a/carl/envs/rna/carl_rna.py
+++ b/carl/envs/rna/carl_rna.py
@@ -1,4 +1,4 @@
-from typing import Dict, Optional, Union
+from typing import Dict, Optional, Union, List
 
 import gym
 import numpy as np

From 484087bb55b24da125c40a58ff6d7bfba65d3095 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Fri, 1 Jul 2022 13:44:49 +0200
Subject: [PATCH 36/37] formatting

---
 carl/envs/classic_control/carl_acrobot.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/carl/envs/classic_control/carl_acrobot.py b/carl/envs/classic_control/carl_acrobot.py
index 11b5924d..8f90147c 100644
--- a/carl/envs/classic_control/carl_acrobot.py
+++ b/carl/envs/classic_control/carl_acrobot.py
@@ -77,8 +77,18 @@ def reset(
         options: Optional[dict] = None
     ):
         super().reset(seed=seed)
-        low = self.INITIAL_ANGLE_LOWER, self.INITIAL_ANGLE_LOWER, self.INITIAL_VELOCITY_LOWER, self.INITIAL_VELOCITY_LOWER
-        high = self.INITIAL_ANGLE_UPPER, self.INITIAL_ANGLE_UPPER, self.INITIAL_VELOCITY_UPPER, self.INITIAL_VELOCITY_UPPER
+        low = (
+            self.INITIAL_ANGLE_LOWER,
+            self.INITIAL_ANGLE_LOWER,
+            self.INITIAL_VELOCITY_LOWER,
+            self.INITIAL_VELOCITY_LOWER
+        )
+        high = (
+            self.INITIAL_ANGLE_UPPER,
+            self.INITIAL_ANGLE_UPPER,
+            self.INITIAL_VELOCITY_UPPER,
+            self.INITIAL_VELOCITY_UPPER
+        )
         self.state = self.np_random.uniform(low=low, high=high).astype(
             np.float32
         )

From 9f0b2c449b2c985f0a40cc4e825bbb7489d1a336 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20D=C3=B6hler?= <sebastiandoehler@hotmail.de>
Date: Fri, 1 Jul 2022 14:03:32 +0200
Subject: [PATCH 37/37] black and isort formatting

---
 carl/__init__.py                              |   4 +-
 carl/context/selection.py                     |   4 +-
 carl/envs/__init__.py                         |   1 +
 carl/envs/box2d/carl_bipedal_walker.py        |  11 +-
 carl/envs/box2d/carl_lunarlander.py           |  19 +-
 carl/envs/box2d/carl_vehicle_racing.py        |   6 +-
 carl/envs/brax/carl_ant.py                    |  37 +--
 carl/envs/brax/carl_fetch.py                  |   8 +-
 carl/envs/brax/carl_grasp.py                  |   8 +-
 carl/envs/brax/carl_halfcheetah.py            |   8 +-
 carl/envs/brax/carl_humanoid.py               |  13 +-
 carl/envs/brax/carl_ur5e.py                   |  36 ++-
 carl/envs/carl_env.py                         |  34 +-
 carl/envs/classic_control/carl_acrobot.py     |  16 +-
 carl/envs/classic_control/carl_cartpole.py    |  10 +-
 carl/envs/classic_control/carl_mountaincar.py |  22 +-
 .../carl_mountaincarcontinuous.py             |   6 +-
 carl/envs/classic_control/carl_pendulum.py    |  14 +-
 carl/envs/dmc/__init__.py                     |  22 +-
 carl/envs/dmc/carl_dm_fish.py                 |  29 +-
 carl/envs/dmc/carl_dm_quadruped.py            |  35 ++-
 carl/envs/dmc/carl_dm_walker.py               |  37 ++-
 carl/envs/dmc/carl_dmcontrol.py               |  11 +-
 carl/envs/dmc/dmc_tasks/fish.py               | 126 +++++---
 carl/envs/dmc/dmc_tasks/quadruped.py          | 294 +++++++++++-------
 carl/envs/dmc/dmc_tasks/utils.py              |  85 +++--
 carl/envs/dmc/dmc_tasks/walker.py             | 124 +++++---
 carl/envs/dmc/loader.py                       |  31 +-
 carl/envs/dmc/try_dm_control.py               |  27 +-
 carl/envs/dmc/wrappers.py                     |  21 +-
 carl/envs/mario/carl_mario.py                 |   6 +-
 carl/envs/rna/carl_rna.py                     |  11 +-
 carl/envs/rna/carl_rna_definitions.py         |  16 +-
 carl/utils/types.py                           |   2 +-
 test/test_CARLEnv.py                          |  16 +-
 test/test_selector.py                         |  16 +-
 36 files changed, 714 insertions(+), 452 deletions(-)

diff --git a/carl/__init__.py b/carl/__init__.py
index 6b0a9e33..2102bbd8 100644
--- a/carl/__init__.py
+++ b/carl/__init__.py
@@ -1,6 +1,4 @@
 __copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
 __license__ = "Apache-2.0 License"
 __version__ = "0.1"
-__author__ = (
-    "Carolin Benjamins, Theresa Eimer, Frederik Schubert, André Biedenkapp, Aditya Mohan"
-)
+__author__ = "Carolin Benjamins, Theresa Eimer, Frederik Schubert, André Biedenkapp, Aditya Mohan"
diff --git a/carl/context/selection.py b/carl/context/selection.py
index c70399be..0c3eda3e 100644
--- a/carl/context/selection.py
+++ b/carl/context/selection.py
@@ -1,7 +1,9 @@
 from abc import abstractmethod
+from typing import Any, Callable, Dict, List, Optional, Tuple
+
 import numpy as np
+
 from carl.utils.types import Context
-from typing import Dict, Any, Optional, Tuple, List, Callable
 
 
 class AbstractSelector(object):
diff --git a/carl/envs/__init__.py b/carl/envs/__init__.py
index 6131d0a0..4e6b3611 100644
--- a/carl/envs/__init__.py
+++ b/carl/envs/__init__.py
@@ -20,6 +20,7 @@
 found = brax_spec is not None
 if found:
     from carl.envs.brax import *
+
     pass
 else:
     warnings.warn(
diff --git a/carl/envs/box2d/carl_bipedal_walker.py b/carl/envs/box2d/carl_bipedal_walker.py
index bc142eda..bad4a401 100644
--- a/carl/envs/box2d/carl_bipedal_walker.py
+++ b/carl/envs/box2d/carl_bipedal_walker.py
@@ -5,9 +5,9 @@
 from gym.envs.box2d import bipedal_walker
 from gym.envs.box2d import bipedal_walker as bpw
 
+from carl.context.selection import AbstractSelector
 from carl.envs.carl_env import CARLEnv
 from carl.utils.trial_logger import TrialLogger
-from carl.context.selection import AbstractSelector
 
 DEFAULT_CONTEXT = {
     "FPS": 50,
@@ -88,7 +88,9 @@ def __init__(
         state_context_features: Optional[List[str]] = None,
         context_mask: Optional[List[str]] = None,
         dict_observation_space: bool = False,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
         """
@@ -119,7 +121,6 @@ def __init__(
             context_selector=context_selector,
             context_selector_kwargs=context_selector_kwargs,
             context_mask=context_mask,
-
         )
         self.whitelist_gaussian_noise = list(
             DEFAULT_CONTEXT.keys()
@@ -194,7 +195,9 @@ def _update_context(self):
         self.env.world.gravity = gravity
 
 
-def demo_heuristic(env: Union[CARLBipedalWalkerEnv, bipedal_walker.BipedalWalker]) -> None:
+def demo_heuristic(
+    env: Union[CARLBipedalWalkerEnv, bipedal_walker.BipedalWalker]
+) -> None:
     env.reset()
     steps = 0
     total_reward = 0
diff --git a/carl/envs/box2d/carl_lunarlander.py b/carl/envs/box2d/carl_lunarlander.py
index 1843ae03..5cee0ad4 100644
--- a/carl/envs/box2d/carl_lunarlander.py
+++ b/carl/envs/box2d/carl_lunarlander.py
@@ -1,13 +1,12 @@
-from typing import Any, Dict, List, Optional, Tuple, Union, TypeVar
+from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union
 
 from gym import Wrapper
-
 from gym.envs.box2d import lunar_lander
 from gym.envs.box2d.lunar_lander import heuristic
 
+from carl.context.selection import AbstractSelector
 from carl.envs.carl_env import CARLEnv
 from carl.utils.trial_logger import TrialLogger
-from carl.context.selection import AbstractSelector
 
 ObsType = TypeVar("ObsType")
 ActType = TypeVar("ActType")
@@ -74,9 +73,9 @@
 
 class LunarLanderEnv(Wrapper):
     def __init__(
-            self,
-            env: Optional[lunar_lander.LunarLander] = None,
-            high_gameover_penalty: bool = False,
+        self,
+        env: Optional[lunar_lander.LunarLander] = None,
+        high_gameover_penalty: bool = False,
     ):
         if env is None:
             env = lunar_lander.LunarLander()
@@ -113,7 +112,9 @@ def __init__(
         max_episode_length: int = 1000,
         high_gameover_penalty: bool = False,
         dict_observation_space: bool = False,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
         """
@@ -178,7 +179,9 @@ def _update_context(self) -> None:
 
 
 def demo_heuristic_lander(
-    env: Union[CARLLunarLanderEnv, lunar_lander.LunarLander, lunar_lander.LunarLanderContinuous],
+    env: Union[
+        CARLLunarLanderEnv, lunar_lander.LunarLander, lunar_lander.LunarLanderContinuous
+    ],
     seed: Optional[int] = None,
     render: bool = False,
 ) -> float:
diff --git a/carl/envs/box2d/carl_vehicle_racing.py b/carl/envs/box2d/carl_vehicle_racing.py
index f5187477..6908c728 100644
--- a/carl/envs/box2d/carl_vehicle_racing.py
+++ b/carl/envs/box2d/carl_vehicle_racing.py
@@ -6,6 +6,7 @@
 from gym.envs.box2d.car_dynamics import Car
 from pyglet import gl
 
+from carl.context.selection import AbstractSelector
 from carl.envs.box2d.parking_garage.bus import AWDBus  # as Car
 from carl.envs.box2d.parking_garage.bus import AWDBusLargeTrailer  # as Car
 from carl.envs.box2d.parking_garage.bus import AWDBusSmallTrailer  # as Car
@@ -37,7 +38,6 @@
 from carl.envs.box2d.parking_garage.trike import TukTukSmallTrailer  # as Car
 from carl.envs.carl_env import CARLEnv
 from carl.utils.trial_logger import TrialLogger
-from carl.context.selection import AbstractSelector
 
 PARKING_GARAGE_DICT = {
     # Racing car
@@ -196,7 +196,9 @@ def __init__(
         state_context_features: Optional[List[str]] = None,
         context_mask: Optional[List[str]] = None,
         dict_observation_space: bool = False,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
         """
diff --git a/carl/envs/brax/carl_ant.py b/carl/envs/brax/carl_ant.py
index b0f27b34..cdeae740 100644
--- a/carl/envs/brax/carl_ant.py
+++ b/carl/envs/brax/carl_ant.py
@@ -6,14 +6,14 @@
 import brax
 import numpy as np
 from brax.envs.ant import _SYSTEM_CONFIG, Ant
-from brax.envs.wrappers import GymWrapper, VectorWrapper, VectorGymWrapper
+from brax.envs.wrappers import GymWrapper, VectorGymWrapper, VectorWrapper
 from google.protobuf import json_format, text_format
 from google.protobuf.json_format import MessageToDict
 from numpyencoder import NumpyEncoder
 
+from carl.context.selection import AbstractSelector
 from carl.envs.carl_env import CARLEnv
 from carl.utils.trial_logger import TrialLogger
-from carl.context.selection import AbstractSelector
 
 DEFAULT_CONTEXT = {
     "joint_stiffness": 5000,
@@ -38,22 +38,23 @@
 
 class CARLAnt(CARLEnv):
     def __init__(
-            self,
-            env: Ant = Ant(),
-            n_envs: int = 1,
-            contexts: Dict[str, Dict] = {},
-            hide_context=False,
-            add_gaussian_noise_to_context: bool = False,
-            gaussian_noise_std_percentage: float = 0.01,
-            logger: Optional[TrialLogger] = None,
-            scale_context_features: str = "no",
-            default_context: Optional[Dict] = DEFAULT_CONTEXT,
-            state_context_features: Optional[List[str]] = None,
-            context_mask: Optional[List[str]] = None,
-            dict_observation_space: bool = False,
-            context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
-            context_selector_kwargs: Optional[Dict] = None,
-
+        self,
+        env: Ant = Ant(),
+        n_envs: int = 1,
+        contexts: Dict[str, Dict] = {},
+        hide_context=False,
+        add_gaussian_noise_to_context: bool = False,
+        gaussian_noise_std_percentage: float = 0.01,
+        logger: Optional[TrialLogger] = None,
+        scale_context_features: str = "no",
+        default_context: Optional[Dict] = DEFAULT_CONTEXT,
+        state_context_features: Optional[List[str]] = None,
+        context_mask: Optional[List[str]] = None,
+        dict_observation_space: bool = False,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
+        context_selector_kwargs: Optional[Dict] = None,
     ):
         if n_envs == 1:
             env = GymWrapper(env)
diff --git a/carl/envs/brax/carl_fetch.py b/carl/envs/brax/carl_fetch.py
index 8d800a91..c3253091 100644
--- a/carl/envs/brax/carl_fetch.py
+++ b/carl/envs/brax/carl_fetch.py
@@ -6,14 +6,14 @@
 import brax
 import numpy as np
 from brax.envs.fetch import _SYSTEM_CONFIG, Fetch
-from brax.envs.wrappers import GymWrapper, VectorWrapper, VectorGymWrapper
+from brax.envs.wrappers import GymWrapper, VectorGymWrapper, VectorWrapper
 from google.protobuf import json_format, text_format
 from google.protobuf.json_format import MessageToDict
 from numpyencoder import NumpyEncoder
 
+from carl.context.selection import AbstractSelector
 from carl.envs.carl_env import CARLEnv
 from carl.utils.trial_logger import TrialLogger
-from carl.context.selection import AbstractSelector
 
 DEFAULT_CONTEXT = {
     "joint_stiffness": 5000,
@@ -55,7 +55,9 @@ def __init__(
         state_context_features: Optional[List[str]] = None,
         context_mask: Optional[List[str]] = None,
         dict_observation_space: bool = False,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
         if n_envs == 1:
diff --git a/carl/envs/brax/carl_grasp.py b/carl/envs/brax/carl_grasp.py
index 04f40f02..0ba14df7 100644
--- a/carl/envs/brax/carl_grasp.py
+++ b/carl/envs/brax/carl_grasp.py
@@ -6,14 +6,14 @@
 import brax
 import numpy as np
 from brax.envs.grasp import _SYSTEM_CONFIG, Grasp
-from brax.envs.wrappers import GymWrapper, VectorWrapper, VectorGymWrapper
+from brax.envs.wrappers import GymWrapper, VectorGymWrapper, VectorWrapper
 from google.protobuf import json_format, text_format
 from google.protobuf.json_format import MessageToDict
 from numpyencoder import NumpyEncoder
 
+from carl.context.selection import AbstractSelector
 from carl.envs.carl_env import CARLEnv
 from carl.utils.trial_logger import TrialLogger
-from carl.context.selection import AbstractSelector
 
 DEFAULT_CONTEXT = {
     "joint_stiffness": 5000,
@@ -55,7 +55,9 @@ def __init__(
         state_context_features: Optional[List[str]] = None,
         context_mask: Optional[List[str]] = None,
         dict_observation_space: bool = False,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
         if n_envs == 1:
diff --git a/carl/envs/brax/carl_halfcheetah.py b/carl/envs/brax/carl_halfcheetah.py
index 98bfda9b..fb9f1d57 100644
--- a/carl/envs/brax/carl_halfcheetah.py
+++ b/carl/envs/brax/carl_halfcheetah.py
@@ -6,14 +6,14 @@
 import brax
 import numpy as np
 from brax.envs.halfcheetah import _SYSTEM_CONFIG, Halfcheetah
-from brax.envs.wrappers import GymWrapper, VectorWrapper, VectorGymWrapper
+from brax.envs.wrappers import GymWrapper, VectorGymWrapper, VectorWrapper
 from google.protobuf import json_format, text_format
 from google.protobuf.json_format import MessageToDict
 from numpyencoder import NumpyEncoder
 
+from carl.context.selection import AbstractSelector
 from carl.envs.carl_env import CARLEnv
 from carl.utils.trial_logger import TrialLogger
-from carl.context.selection import AbstractSelector
 
 DEFAULT_CONTEXT = {
     "joint_stiffness": 15000.0,
@@ -49,7 +49,9 @@ def __init__(
         state_context_features: Optional[List[str]] = None,
         context_mask: Optional[List[str]] = None,
         dict_observation_space: bool = False,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
         if n_envs == 1:
diff --git a/carl/envs/brax/carl_humanoid.py b/carl/envs/brax/carl_humanoid.py
index 02e6fd04..dc66ccd4 100644
--- a/carl/envs/brax/carl_humanoid.py
+++ b/carl/envs/brax/carl_humanoid.py
@@ -3,20 +3,19 @@
 import copy
 import json
 
-import numpy as np
 import brax
+import numpy as np
 from brax import jumpy as jp
-from brax.envs.wrappers import GymWrapper, VectorWrapper, VectorGymWrapper
-from brax.envs.humanoid import Humanoid, _SYSTEM_CONFIG
+from brax.envs.humanoid import _SYSTEM_CONFIG, Humanoid
+from brax.envs.wrappers import GymWrapper, VectorGymWrapper, VectorWrapper
 from brax.physics import bodies
-
 from google.protobuf import json_format, text_format
 from google.protobuf.json_format import MessageToDict
 from numpyencoder import NumpyEncoder
 
+from carl.context.selection import AbstractSelector
 from carl.envs.carl_env import CARLEnv
 from carl.utils.trial_logger import TrialLogger
-from carl.context.selection import AbstractSelector
 
 DEFAULT_CONTEXT = {
     "gravity": -9.8,
@@ -50,7 +49,9 @@ def __init__(
         state_context_features: Optional[List[str]] = None,
         context_mask: Optional[List[str]] = None,
         dict_observation_space: bool = False,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
         if n_envs == 1:
diff --git a/carl/envs/brax/carl_ur5e.py b/carl/envs/brax/carl_ur5e.py
index 4653bb63..4838abfe 100644
--- a/carl/envs/brax/carl_ur5e.py
+++ b/carl/envs/brax/carl_ur5e.py
@@ -6,14 +6,14 @@
 import brax
 import numpy as np
 from brax.envs.ur5e import _SYSTEM_CONFIG, Ur5e
-from brax.envs.wrappers import GymWrapper, VectorWrapper, VectorGymWrapper
+from brax.envs.wrappers import GymWrapper, VectorGymWrapper, VectorWrapper
 from google.protobuf import json_format, text_format
 from google.protobuf.json_format import MessageToDict
 from numpyencoder import NumpyEncoder
 
+from carl.context.selection import AbstractSelector
 from carl.envs.carl_env import CARLEnv
 from carl.utils.trial_logger import TrialLogger
-from carl.context.selection import AbstractSelector
 
 DEFAULT_CONTEXT = {
     "joint_stiffness": 40000,
@@ -42,21 +42,23 @@
 
 class CARLUr5e(CARLEnv):
     def __init__(
-            self,
-            env: Ur5e = Ur5e(),
-            n_envs: int = 1,
-            contexts: Dict[str, Dict] = {},
-            hide_context=False,
-            add_gaussian_noise_to_context: bool = False,
-            gaussian_noise_std_percentage: float = 0.01,
-            logger: Optional[TrialLogger] = None,
-            scale_context_features: str = "no",
-            default_context: Optional[Dict] = DEFAULT_CONTEXT,
-            state_context_features: Optional[List[str]] = None,
-            context_mask: Optional[List[str]] = None,
-            dict_observation_space: bool = False,
-            context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
-            context_selector_kwargs: Optional[Dict] = None,
+        self,
+        env: Ur5e = Ur5e(),
+        n_envs: int = 1,
+        contexts: Dict[str, Dict] = {},
+        hide_context=False,
+        add_gaussian_noise_to_context: bool = False,
+        gaussian_noise_std_percentage: float = 0.01,
+        logger: Optional[TrialLogger] = None,
+        scale_context_features: str = "no",
+        default_context: Optional[Dict] = DEFAULT_CONTEXT,
+        state_context_features: Optional[List[str]] = None,
+        context_mask: Optional[List[str]] = None,
+        dict_observation_space: bool = False,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
+        context_selector_kwargs: Optional[Dict] = None,
     ):
         if n_envs == 1:
             env = GymWrapper(env)
diff --git a/carl/envs/carl_env.py b/carl/envs/carl_env.py
index 592c9588..ea29ddf9 100644
--- a/carl/envs/carl_env.py
+++ b/carl/envs/carl_env.py
@@ -1,23 +1,23 @@
 from typing import Any, Dict, List, Optional, Tuple, Union
 
+import importlib
+import inspect
 import json
 import os
-import inspect
 
 import gym
 import numpy as np
 from gym import Wrapper, spaces
 
 from carl.context.augmentation import add_gaussian_noise
+from carl.context.selection import AbstractSelector, RoundRobinSelector
 from carl.context.utils import get_context_bounds
 from carl.utils.trial_logger import TrialLogger
-from carl.context.selection import AbstractSelector, RoundRobinSelector
 
-import importlib
 brax_spec = importlib.util.find_spec("brax")
 if brax_spec is not None:
-    import jaxlib
     import jax.numpy as jnp
+    import jaxlib
 
 
 class CARLEnv(Wrapper):
@@ -98,7 +98,9 @@ def __init__(
         state_context_features: Optional[List[str]] = None,
         context_mask: Optional[List[str]] = None,
         dict_observation_space: bool = False,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
         super().__init__(env=env)
@@ -118,15 +120,19 @@ def __init__(
             self.context_selector = RoundRobinSelector(contexts=contexts)
         elif isinstance(context_selector, AbstractSelector):
             self.context_selector = context_selector
-        elif inspect.isclass(context_selector) and issubclass(context_selector, AbstractSelector):
+        elif inspect.isclass(context_selector) and issubclass(
+            context_selector, AbstractSelector
+        ):
             if context_selector_kwargs is None:
                 context_selector_kwargs = {}
             _context_selector_kwargs = {"contexts": contexts}
             context_selector_kwargs.update(_context_selector_kwargs)
             self.context_selector = context_selector(**context_selector_kwargs)
         else:
-            raise ValueError(f"Context selector must be None or an AbstractSelector class or instance. "
-                             f"Got type {type(context_selector)}.")
+            raise ValueError(
+                f"Context selector must be None or an AbstractSelector class or instance. "
+                f"Got type {type(context_selector)}."
+            )
         if state_context_features is not None:
             if (
                 state_context_features == "changing_context_features"
@@ -158,13 +164,17 @@ def __init__(
                 else:
                     state_context_features = []
         else:
-            state_context_features = list(self.contexts[list(self.contexts.keys())[0]].keys())
+            state_context_features = list(
+                self.contexts[list(self.contexts.keys())[0]].keys()
+            )
         self.state_context_features: List[str] = state_context_features
         # state_context_features contains the names of the context features that should be appended to the state
         # However, if context_mask is set, we want to update staet_context_feature_names so that the context features
         # in context_mask are not appended to the state anymore.
         if self.context_mask:
-            self.state_context_features = [s for s in self.state_context_features if s not in self.context_mask]
+            self.state_context_features = [
+                s for s in self.state_context_features if s not in self.context_mask
+            ]
 
         self.step_counter = 0  # type: int # increased in/after step
         self.total_timestep_counter = 0  # type: int
@@ -224,7 +234,9 @@ def contexts(self) -> Dict[Any, Dict[Any, Any]]:
 
     @contexts.setter
     def contexts(self, contexts: Dict[Any, Dict[Any, Any]]):
-        self._contexts = {k: self.fill_context_with_default(context=v) for k, v in contexts.items()}
+        self._contexts = {
+            k: self.fill_context_with_default(context=v) for k, v in contexts.items()
+        }
 
     def reset(self, **kwargs: Dict) -> Any:
         """
diff --git a/carl/envs/classic_control/carl_acrobot.py b/carl/envs/classic_control/carl_acrobot.py
index 8f90147c..046ad3cf 100644
--- a/carl/envs/classic_control/carl_acrobot.py
+++ b/carl/envs/classic_control/carl_acrobot.py
@@ -4,9 +4,9 @@
 import numpy as np
 from gym.envs.classic_control import AcrobotEnv
 
+from carl.context.selection import AbstractSelector
 from carl.envs.carl_env import CARLEnv
 from carl.utils.trial_logger import TrialLogger
-from carl.context.selection import AbstractSelector
 
 DEFAULT_CONTEXT = {
     "link_length_1": 1,  # should be seen as 100% default and scaled
@@ -74,24 +74,22 @@ def reset(
         *,
         seed: Optional[int] = None,
         return_info: bool = False,
-        options: Optional[dict] = None
+        options: Optional[dict] = None,
     ):
         super().reset(seed=seed)
         low = (
             self.INITIAL_ANGLE_LOWER,
             self.INITIAL_ANGLE_LOWER,
             self.INITIAL_VELOCITY_LOWER,
-            self.INITIAL_VELOCITY_LOWER
+            self.INITIAL_VELOCITY_LOWER,
         )
         high = (
             self.INITIAL_ANGLE_UPPER,
             self.INITIAL_ANGLE_UPPER,
             self.INITIAL_VELOCITY_UPPER,
-            self.INITIAL_VELOCITY_UPPER
-        )
-        self.state = self.np_random.uniform(low=low, high=high).astype(
-            np.float32
+            self.INITIAL_VELOCITY_UPPER,
         )
+        self.state = self.np_random.uniform(low=low, high=high).astype(np.float32)
         if not return_info:
             return self._get_ob()
         else:
@@ -113,7 +111,9 @@ def __init__(
         state_context_features: Optional[List[str]] = None,
         context_mask: Optional[List[str]] = None,
         dict_observation_space: bool = False,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
         if not contexts:
diff --git a/carl/envs/classic_control/carl_cartpole.py b/carl/envs/classic_control/carl_cartpole.py
index 68387756..d8ffa0f2 100644
--- a/carl/envs/classic_control/carl_cartpole.py
+++ b/carl/envs/classic_control/carl_cartpole.py
@@ -4,9 +4,9 @@
 import numpy as np
 from gym.envs.classic_control import CartPoleEnv
 
+from carl.context.selection import AbstractSelector
 from carl.envs.carl_env import CARLEnv
 from carl.utils.trial_logger import TrialLogger
-from carl.context.selection import AbstractSelector
 
 DEFAULT_CONTEXT = {
     "gravity": 9.8,
@@ -49,7 +49,9 @@ def reset(
         options: Optional[dict] = None,
     ):
         super().reset(seed=seed)
-        self.state = self.np_random.uniform(low=self.initial_state_lower, high=self.initial_state_upper, size=(4,))
+        self.state = self.np_random.uniform(
+            low=self.initial_state_lower, high=self.initial_state_upper, size=(4,)
+        )
         self.steps_beyond_done = None
         if not return_info:
             return np.array(self.state, dtype=np.float32)
@@ -72,7 +74,9 @@ def __init__(
         state_context_features: Optional[List[str]] = None,
         context_mask: Optional[List[str]] = None,
         dict_observation_space: bool = False,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
         if not contexts:
diff --git a/carl/envs/classic_control/carl_mountaincar.py b/carl/envs/classic_control/carl_mountaincar.py
index ef361183..61d23f3f 100644
--- a/carl/envs/classic_control/carl_mountaincar.py
+++ b/carl/envs/classic_control/carl_mountaincar.py
@@ -4,9 +4,9 @@
 import gym.envs.classic_control as gccenvs
 import numpy as np
 
+from carl.context.selection import AbstractSelector
 from carl.envs.carl_env import CARLEnv
 from carl.utils.trial_logger import TrialLogger
-from carl.context.selection import AbstractSelector
 
 DEFAULT_CONTEXT = {
     "min_position": -1.2,  # unit?
@@ -18,8 +18,8 @@
     "gravity": 0.0025,  # unit?
     "min_position_start": -0.6,
     "max_position_start": -0.4,
-    "min_velocity_start": 0.,
-    "max_velocity_start": 0.,
+    "min_velocity_start": 0.0,
+    "max_velocity_start": 0.0,
 }
 
 CONTEXT_BOUNDS = {
@@ -42,14 +42,18 @@ def __init__(self, goal_velocity: float = 0.0):
         super(CustomMountainCarEnv, self).__init__(goal_velocity=goal_velocity)
         self.min_position_start = -0.6
         self.max_position_start = -0.4
-        self.min_velocity_start = 0.
-        self.max_velocity_start = 0.
+        self.min_velocity_start = 0.0
+        self.max_velocity_start = 0.0
 
     def reset_state(self) -> np.ndarray:
         return np.array(
             [
-                self.np_random.uniform(low=self.min_position_start, high=self.max_position_start),
-                self.np_random.uniform(low=self.min_velocity_start, high=self.max_velocity_start)
+                self.np_random.uniform(
+                    low=self.min_position_start, high=self.max_position_start
+                ),
+                self.np_random.uniform(
+                    low=self.min_velocity_start, high=self.max_velocity_start
+                ),
             ]
         )
 
@@ -82,7 +86,9 @@ def __init__(
         state_context_features: Optional[List[str]] = None,
         context_mask: Optional[List[str]] = None,
         dict_observation_space: bool = False,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
         """
diff --git a/carl/envs/classic_control/carl_mountaincarcontinuous.py b/carl/envs/classic_control/carl_mountaincarcontinuous.py
index 64090360..a620688c 100644
--- a/carl/envs/classic_control/carl_mountaincarcontinuous.py
+++ b/carl/envs/classic_control/carl_mountaincarcontinuous.py
@@ -4,9 +4,9 @@
 import gym.envs.classic_control as gccenvs
 import numpy as np
 
+from carl.context.selection import AbstractSelector
 from carl.envs.carl_env import CARLEnv
 from carl.utils.trial_logger import TrialLogger
-from carl.context.selection import AbstractSelector
 
 DEFAULT_CONTEXT = {
     "min_position": -1.2,
@@ -77,7 +77,9 @@ def __init__(
         state_context_features: Optional[List[str]] = None,
         context_mask: Optional[List[str]] = None,
         dict_observation_space: bool = False,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
         """
diff --git a/carl/envs/classic_control/carl_pendulum.py b/carl/envs/classic_control/carl_pendulum.py
index 6b0748aa..926b08b4 100644
--- a/carl/envs/classic_control/carl_pendulum.py
+++ b/carl/envs/classic_control/carl_pendulum.py
@@ -4,9 +4,9 @@
 import gym.envs.classic_control as gccenvs
 import numpy as np
 
+from carl.context.selection import AbstractSelector
 from carl.envs.carl_env import CARLEnv
 from carl.utils.trial_logger import TrialLogger
-from carl.context.selection import AbstractSelector
 
 DEFAULT_CONTEXT = {
     "max_speed": 8.0,
@@ -14,7 +14,6 @@
     "g": 10.0,
     "m": 1.0,
     "l": 1.0,
-
     "initial_angle_max": np.pi,  # Upper bound for uniform distribution to sample from
     "initial_velocity_max": 1,  # Upper bound for uniform distribution to sample from
     # The lower bound will be the negative value.
@@ -26,14 +25,13 @@
     "g": (0, np.inf, float),
     "m": (1e-6, np.inf, float),
     "l": (1e-6, np.inf, float),
-
     "initial_angle_max": (0, np.inf, float),
-    "initial_velocity_max": (0, np.inf, float)
+    "initial_velocity_max": (0, np.inf, float),
 }
 
 
 class CustomPendulum(gccenvs.pendulum.PendulumEnv):
-    def __init__(self, g: float = 10.):
+    def __init__(self, g: float = 10.0):
         super(CustomPendulum, self).__init__(g=g)
         self.initial_angle_max = DEFAULT_CONTEXT["initial_angle_max"]
         self.initial_velocity_max = DEFAULT_CONTEXT["initial_velocity_max"]
@@ -43,7 +41,7 @@ def reset(
         *,
         seed: Optional[int] = None,
         return_info: bool = False,
-        options: Optional[dict] = None
+        options: Optional[dict] = None,
     ):
         super().reset(seed=seed)
         high = np.array([self.initial_angle_max, self.initial_velocity_max])
@@ -70,7 +68,9 @@ def __init__(
         state_context_features: Optional[List[str]] = None,
         context_mask: Optional[List[str]] = None,
         dict_observation_space: bool = False,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
         """
diff --git a/carl/envs/dmc/__init__.py b/carl/envs/dmc/__init__.py
index e1cfa631..b81d8084 100644
--- a/carl/envs/dmc/__init__.py
+++ b/carl/envs/dmc/__init__.py
@@ -1,19 +1,13 @@
 # flake8: noqa: F401
 # Contexts and bounds by name
-from carl.envs.dmc.carl_dm_walker import (
-    CARLDmcWalkerEnv,
-    DEFAULT_CONTEXT as CARLDmcWalkerEnv_defaults,
-    CONTEXT_MASK as CARLDmcWalkerEnv_mask,
-)
-
+from carl.envs.dmc.carl_dm_fish import CONTEXT_MASK as CARLDmcFishEnv_mask
+from carl.envs.dmc.carl_dm_fish import DEFAULT_CONTEXT as CARLDmcFishEnv_defaults
+from carl.envs.dmc.carl_dm_fish import CARLDmcFishEnv
+from carl.envs.dmc.carl_dm_quadruped import CONTEXT_MASK as CARLDmcQuadrupedEnv_mask
 from carl.envs.dmc.carl_dm_quadruped import (
-    CARLDmcQuadrupedEnv,
     DEFAULT_CONTEXT as CARLDmcQuadrupedEnv_defaults,
-    CONTEXT_MASK as CARLDmcQuadrupedEnv_mask,
-)
-
-from carl.envs.dmc.carl_dm_fish import (
-    CARLDmcFishEnv,
-    DEFAULT_CONTEXT as CARLDmcFishEnv_defaults,
-    CONTEXT_MASK as CARLDmcFishEnv_mask,
 )
+from carl.envs.dmc.carl_dm_quadruped import CARLDmcQuadrupedEnv
+from carl.envs.dmc.carl_dm_walker import CONTEXT_MASK as CARLDmcWalkerEnv_mask
+from carl.envs.dmc.carl_dm_walker import DEFAULT_CONTEXT as CARLDmcWalkerEnv_defaults
+from carl.envs.dmc.carl_dm_walker import CARLDmcWalkerEnv
diff --git a/carl/envs/dmc/carl_dm_fish.py b/carl/envs/dmc/carl_dm_fish.py
index a50b3a65..89940b41 100644
--- a/carl/envs/dmc/carl_dm_fish.py
+++ b/carl/envs/dmc/carl_dm_fish.py
@@ -2,11 +2,10 @@
 
 import numpy as np
 
-from carl.utils.trial_logger import TrialLogger
 from carl.context.selection import AbstractSelector
 from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv
 from carl.envs.dmc.dmc_tasks.fish import STEP_LIMIT
-
+from carl.utils.trial_logger import TrialLogger
 
 DEFAULT_CONTEXT = {
     "gravity": -9.81,  # Gravity is disabled via flag
@@ -14,15 +13,15 @@
     "friction_torsional": 1,  # Scaling factor for torsional friction of all geoms (objects)
     "friction_rolling": 1,  # Scaling factor for rolling friction of all geoms (objects)
     "timestep": 0.004,  # Seconds between updates
-    "joint_damping": 1.,  # Scaling factor for all joints
-    "joint_stiffness": 0.,
+    "joint_damping": 1.0,  # Scaling factor for all joints
+    "joint_stiffness": 0.0,
     "actuator_strength": 1,  # Scaling factor for all actuators in the model
-    "density": 5000.,
-    "viscosity": 0.,
-    "geom_density": 1.,  # No effect, because no gravity
-    "wind_x": 0.,
-    "wind_y": 0.,
-    "wind_z": 0.,
+    "density": 5000.0,
+    "viscosity": 0.0,
+    "geom_density": 1.0,  # No effect, because no gravity
+    "wind_x": 0.0,
+    "wind_y": 0.0,
+    "wind_z": 0.0,
 }
 
 CONTEXT_BOUNDS = {
@@ -30,7 +29,11 @@
     "friction_tangential": (0, np.inf, float),
     "friction_torsional": (0, np.inf, float),
     "friction_rolling": (0, np.inf, float),
-    "timestep": (0.001, 0.1, float,),
+    "timestep": (
+        0.001,
+        0.1,
+        float,
+    ),
     "joint_damping": (0, np.inf, float),
     "joint_stiffness": (0, np.inf, float),
     "actuator_strength": (0, np.inf, float),
@@ -67,7 +70,9 @@ def __init__(
         max_episode_length: int = STEP_LIMIT,
         state_context_features: Optional[List[str]] = None,
         dict_observation_space: bool = False,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
         super().__init__(
diff --git a/carl/envs/dmc/carl_dm_quadruped.py b/carl/envs/dmc/carl_dm_quadruped.py
index 31f84021..3e9aa152 100644
--- a/carl/envs/dmc/carl_dm_quadruped.py
+++ b/carl/envs/dmc/carl_dm_quadruped.py
@@ -2,27 +2,26 @@
 
 import numpy as np
 
-from carl.utils.trial_logger import TrialLogger
 from carl.context.selection import AbstractSelector
 from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv
 from carl.envs.dmc.dmc_tasks.quadruped import STEP_LIMIT
-
+from carl.utils.trial_logger import TrialLogger
 
 DEFAULT_CONTEXT = {
     "gravity": -9.81,
-    "friction_tangential": 1.,  # Scaling factor for tangential friction of all geoms (objects)
-    "friction_torsional": 1.,  # Scaling factor for torsional friction of all geoms (objects)
-    "friction_rolling": 1.,  # Scaling factor for rolling friction of all geoms (objects)
+    "friction_tangential": 1.0,  # Scaling factor for tangential friction of all geoms (objects)
+    "friction_torsional": 1.0,  # Scaling factor for torsional friction of all geoms (objects)
+    "friction_rolling": 1.0,  # Scaling factor for rolling friction of all geoms (objects)
     "timestep": 0.005,  # Seconds between updates
-    "joint_damping": 1.,  # Scaling factor for all joints
-    "joint_stiffness": 0.,
+    "joint_damping": 1.0,  # Scaling factor for all joints
+    "joint_stiffness": 0.0,
     "actuator_strength": 1,  # Scaling factor for all actuators in the model
-    "density": 0.,
-    "viscosity": 0.,
-    "geom_density": 1.,  # Scaling factor for all geom (objects) densities
-    "wind_x": 0.,
-    "wind_y": 0.,
-    "wind_z": 0.,
+    "density": 0.0,
+    "viscosity": 0.0,
+    "geom_density": 1.0,  # Scaling factor for all geom (objects) densities
+    "wind_x": 0.0,
+    "wind_y": 0.0,
+    "wind_z": 0.0,
 }
 
 CONTEXT_BOUNDS = {
@@ -30,7 +29,11 @@
     "friction_tangential": (0, np.inf, float),
     "friction_torsional": (0, np.inf, float),
     "friction_rolling": (0, np.inf, float),
-    "timestep": (0.001, 0.1, float,),
+    "timestep": (
+        0.001,
+        0.1,
+        float,
+    ),
     "joint_damping": (0, np.inf, float),
     "joint_stiffness": (0, np.inf, float),
     "actuator_strength": (0, np.inf, float),
@@ -65,7 +68,9 @@ def __init__(
         max_episode_length: int = STEP_LIMIT,
         state_context_features: Optional[List[str]] = None,
         dict_observation_space: bool = False,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
         super().__init__(
diff --git a/carl/envs/dmc/carl_dm_walker.py b/carl/envs/dmc/carl_dm_walker.py
index 5edcf30a..90fe5811 100644
--- a/carl/envs/dmc/carl_dm_walker.py
+++ b/carl/envs/dmc/carl_dm_walker.py
@@ -2,27 +2,26 @@
 
 import numpy as np
 
-from carl.utils.trial_logger import TrialLogger
 from carl.context.selection import AbstractSelector
 from carl.envs.dmc.carl_dmcontrol import CARLDmcEnv
 from carl.envs.dmc.dmc_tasks.walker import STEP_LIMIT
-
+from carl.utils.trial_logger import TrialLogger
 
 DEFAULT_CONTEXT = {
     "gravity": -9.81,
-    "friction_tangential": 1.,  # Scaling factor for tangential friction of all geoms (objects)
-    "friction_torsional": 1.,  # Scaling factor for torsional friction of all geoms (objects)
-    "friction_rolling": 1.,  # Scaling factor for rolling friction of all geoms (objects)
+    "friction_tangential": 1.0,  # Scaling factor for tangential friction of all geoms (objects)
+    "friction_torsional": 1.0,  # Scaling factor for torsional friction of all geoms (objects)
+    "friction_rolling": 1.0,  # Scaling factor for rolling friction of all geoms (objects)
     "timestep": 0.0025,  # Seconds between updates
-    "joint_damping": 1.,  # Scaling factor for all joints
-    "joint_stiffness": 0.,
-    "actuator_strength": 1.,  # Scaling factor for all actuators in the model
-    "density": 0.,
-    "viscosity": 0.,
-    "geom_density": 1.,  # Scaling factor for all geom (objects) densities
-    "wind_x": 0.,
-    "wind_y": 0.,
-    "wind_z": 0.,
+    "joint_damping": 1.0,  # Scaling factor for all joints
+    "joint_stiffness": 0.0,
+    "actuator_strength": 1.0,  # Scaling factor for all actuators in the model
+    "density": 0.0,
+    "viscosity": 0.0,
+    "geom_density": 1.0,  # Scaling factor for all geom (objects) densities
+    "wind_x": 0.0,
+    "wind_y": 0.0,
+    "wind_z": 0.0,
 }
 
 CONTEXT_BOUNDS = {
@@ -30,7 +29,11 @@
     "friction_tangential": (0, np.inf, float),
     "friction_torsional": (0, np.inf, float),
     "friction_rolling": (0, np.inf, float),
-    "timestep": (0.001, 0.1, float,),
+    "timestep": (
+        0.001,
+        0.1,
+        float,
+    ),
     "joint_damping": (0, np.inf, float),
     "joint_stiffness": (0, np.inf, float),
     "actuator_strength": (0, np.inf, float),
@@ -65,7 +68,9 @@ def __init__(
         max_episode_length: int = STEP_LIMIT,
         state_context_features: Optional[List[str]] = None,
         dict_observation_space: bool = False,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
         super().__init__(
diff --git a/carl/envs/dmc/carl_dmcontrol.py b/carl/envs/dmc/carl_dmcontrol.py
index f6359a9d..054b0fcd 100644
--- a/carl/envs/dmc/carl_dmcontrol.py
+++ b/carl/envs/dmc/carl_dmcontrol.py
@@ -1,10 +1,10 @@
-from typing import Any, Dict, List, Union, Optional
+from typing import Any, Dict, List, Optional, Union
 
+from carl.context.selection import AbstractSelector
 from carl.envs.carl_env import CARLEnv
-from carl.envs.dmc.wrappers import MujocoToGymWrapper
 from carl.envs.dmc.loader import load_dmc_env
+from carl.envs.dmc.wrappers import MujocoToGymWrapper
 from carl.utils.trial_logger import TrialLogger
-from carl.context.selection import AbstractSelector
 
 
 class CARLDmcEnv(CARLEnv):
@@ -27,6 +27,7 @@ class CARLDmcEnv(CARLEnv):
     NotImplementedError
         Dict observation spaces are not implemented for dm-control yet.
     """
+
     def __init__(
         self,
         domain: str,
@@ -55,7 +56,7 @@ def __init__(
             task_name=self.task,
             context={},
             context_mask=[],
-            environment_kwargs={"flat_observation": True}
+            environment_kwargs={"flat_observation": True},
         )
         env = MujocoToGymWrapper(env)
 
@@ -86,6 +87,6 @@ def _update_context(self) -> None:
             task_name=self.task,
             context=self.context,
             context_mask=self.context_mask,
-            environment_kwargs={"flat_observation": True}
+            environment_kwargs={"flat_observation": True},
         )
         self.env = MujocoToGymWrapper(env)
diff --git a/carl/envs/dmc/dmc_tasks/fish.py b/carl/envs/dmc/dmc_tasks/fish.py
index 47dc6d25..30092006 100644
--- a/carl/envs/dmc/dmc_tasks/fish.py
+++ b/carl/envs/dmc/dmc_tasks/fish.py
@@ -17,61 +17,84 @@
 
 import collections
 
+import numpy as np
 from dm_control import mujoco
 from dm_control.rl import control
-from dm_control.suite import base
-from dm_control.suite import common
-from dm_control.utils import containers
-from dm_control.utils import rewards
-import numpy as np
-from carl.envs.dmc.dmc_tasks.utils import adapt_context
+from dm_control.suite import base, common
+from dm_control.utils import containers, rewards
 
+from carl.envs.dmc.dmc_tasks.utils import adapt_context
 
 _DEFAULT_TIME_LIMIT = 40
-_CONTROL_TIMESTEP = .04
+_CONTROL_TIMESTEP = 0.04
 STEP_LIMIT = 1000
-_JOINTS = ['tail1',
-           'tail_twist',
-           'tail2',
-           'finright_roll',
-           'finright_pitch',
-           'finleft_roll',
-           'finleft_pitch']
+_JOINTS = [
+    "tail1",
+    "tail_twist",
+    "tail2",
+    "finright_roll",
+    "finright_pitch",
+    "finleft_roll",
+    "finleft_pitch",
+]
 SUITE = containers.TaggedTasks()
 
 
 def get_model_and_assets():
     """Returns a tuple containing the model XML string and a dict of assets."""
-    return common.read_model('fish.xml'), common.ASSETS
+    return common.read_model("fish.xml"), common.ASSETS
 
 
-@SUITE.add('benchmarking')
-def upright_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None,
-                    environment_kwargs=None):
+@SUITE.add("benchmarking")
+def upright_context(
+    context={},
+    context_mask=[],
+    time_limit=_DEFAULT_TIME_LIMIT,
+    random=None,
+    environment_kwargs=None,
+):
     """Returns the Fish Upright task."""
     xml_string, assets = get_model_and_assets()
     if context != {}:
-        xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
+        xml_string = adapt_context(
+            xml_string=xml_string, context=context, context_mask=context_mask
+        )
     physics = Physics.from_xml_string(xml_string, assets)
     task = Upright(random=random)
     environment_kwargs = environment_kwargs or {}
     return control.Environment(
-        physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
-        **environment_kwargs)
-
-
-@SUITE.add('benchmarking')
-def swim_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+        physics,
+        task,
+        control_timestep=_CONTROL_TIMESTEP,
+        time_limit=time_limit,
+        **environment_kwargs,
+    )
+
+
+@SUITE.add("benchmarking")
+def swim_context(
+    context={},
+    context_mask=[],
+    time_limit=_DEFAULT_TIME_LIMIT,
+    random=None,
+    environment_kwargs=None,
+):
     """Returns the Fish Swim task."""
     xml_string, assets = get_model_and_assets()
     if context != {}:
-        xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
+        xml_string = adapt_context(
+            xml_string=xml_string, context=context, context_mask=context_mask
+        )
     physics = Physics.from_xml_string(xml_string, assets)
     task = Swim(random=random)
     environment_kwargs = environment_kwargs or {}
     return control.Environment(
-        physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
-        **environment_kwargs)
+        physics,
+        task,
+        control_timestep=_CONTROL_TIMESTEP,
+        time_limit=time_limit,
+        **environment_kwargs,
+    )
 
 
 class Physics(mujoco.Physics):
@@ -79,7 +102,7 @@ class Physics(mujoco.Physics):
 
     def upright(self):
         """Returns projection from z-axes of torso to the z-axes of worldbody."""
-        return self.named.data.xmat['torso', 'zz']
+        return self.named.data.xmat["torso", "zz"]
 
     def torso_velocity(self):
         """Returns velocities and angular velocities of the torso."""
@@ -96,8 +119,8 @@ def joint_angles(self):
     def mouth_to_target(self):
         """Returns a vector, from mouth to target in local coordinate of mouth."""
         data = self.named.data
-        mouth_to_target_global = data.geom_xpos['target'] - data.geom_xpos['mouth']
-        return mouth_to_target_global.dot(data.geom_xmat['mouth'].reshape(3, 3))
+        mouth_to_target_global = data.geom_xpos["target"] - data.geom_xpos["mouth"]
+        return mouth_to_target_global.dot(data.geom_xmat["mouth"].reshape(3, 3))
 
 
 class Upright(base.Task):
@@ -115,19 +138,19 @@ def __init__(self, random=None):
     def initialize_episode(self, physics):
         """Randomizes the tail and fin angles and the orientation of the Fish."""
         quat = self.random.randn(4)
-        physics.named.data.qpos['root'][3:7] = quat / np.linalg.norm(quat)
+        physics.named.data.qpos["root"][3:7] = quat / np.linalg.norm(quat)
         for joint in _JOINTS:
-            physics.named.data.qpos[joint] = self.random.uniform(-.2, .2)
+            physics.named.data.qpos[joint] = self.random.uniform(-0.2, 0.2)
         # Hide the target. It's irrelevant for this task.
-        physics.named.model.geom_rgba['target', 3] = 0
+        physics.named.model.geom_rgba["target", 3] = 0
         super().initialize_episode(physics)
 
     def get_observation(self, physics):
         """Returns an observation of joint angles, velocities and uprightness."""
         obs = collections.OrderedDict()
-        obs['joint_angles'] = physics.joint_angles()
-        obs['upright'] = physics.upright()
-        obs['velocity'] = physics.velocity()
+        obs["joint_angles"] = physics.joint_angles()
+        obs["upright"] = physics.upright()
+        obs["velocity"] = physics.velocity()
         return obs
 
     def get_reward(self, physics):
@@ -151,28 +174,31 @@ def initialize_episode(self, physics):
         """Sets the state of the environment at the start of each episode."""
 
         quat = self.random.randn(4)
-        physics.named.data.qpos['root'][3:7] = quat / np.linalg.norm(quat)
+        physics.named.data.qpos["root"][3:7] = quat / np.linalg.norm(quat)
         for joint in _JOINTS:
-            physics.named.data.qpos[joint] = self.random.uniform(-.2, .2)
+            physics.named.data.qpos[joint] = self.random.uniform(-0.2, 0.2)
         # Randomize target position.
-        physics.named.model.geom_pos['target', 'x'] = self.random.uniform(-.4, .4)
-        physics.named.model.geom_pos['target', 'y'] = self.random.uniform(-.4, .4)
-        physics.named.model.geom_pos['target', 'z'] = self.random.uniform(.1, .3)
+        physics.named.model.geom_pos["target", "x"] = self.random.uniform(-0.4, 0.4)
+        physics.named.model.geom_pos["target", "y"] = self.random.uniform(-0.4, 0.4)
+        physics.named.model.geom_pos["target", "z"] = self.random.uniform(0.1, 0.3)
         super().initialize_episode(physics)
 
     def get_observation(self, physics):
         """Returns an observation of joints, target direction and velocities."""
         obs = collections.OrderedDict()
-        obs['joint_angles'] = physics.joint_angles()
-        obs['upright'] = physics.upright()
-        obs['target'] = physics.mouth_to_target()
-        obs['velocity'] = physics.velocity()
+        obs["joint_angles"] = physics.joint_angles()
+        obs["upright"] = physics.upright()
+        obs["target"] = physics.mouth_to_target()
+        obs["velocity"] = physics.velocity()
         return obs
 
     def get_reward(self, physics):
         """Returns a smooth reward."""
-        radii = physics.named.model.geom_size[['mouth', 'target'], 0].sum()
-        in_target = rewards.tolerance(np.linalg.norm(physics.mouth_to_target()),
-                                      bounds=(0, radii), margin=2*radii)
+        radii = physics.named.model.geom_size[["mouth", "target"], 0].sum()
+        in_target = rewards.tolerance(
+            np.linalg.norm(physics.mouth_to_target()),
+            bounds=(0, radii),
+            margin=2 * radii,
+        )
         is_upright = 0.5 * (physics.upright() + 1)
-        return (7*in_target + is_upright) / 8
+        return (7 * in_target + is_upright) / 8
diff --git a/carl/envs/dmc/dmc_tasks/quadruped.py b/carl/envs/dmc/dmc_tasks/quadruped.py
index ed1bf5ad..cfab004c 100644
--- a/carl/envs/dmc/dmc_tasks/quadruped.py
+++ b/carl/envs/dmc/dmc_tasks/quadruped.py
@@ -17,25 +17,23 @@
 
 import collections
 
+import numpy as np
 from dm_control import mujoco
 from dm_control.mujoco.wrapper import mjbindings
 from dm_control.rl import control
-from dm_control.suite import base
-from dm_control.suite import common
-from dm_control.utils import containers
-from dm_control.utils import rewards
-from dm_control.utils import xml_tools
-from carl.envs.dmc.dmc_tasks.utils import adapt_context
+from dm_control.suite import base, common
+from dm_control.utils import containers, rewards, xml_tools
 from lxml import etree
-import numpy as np
 from scipy import ndimage
 
+from carl.envs.dmc.dmc_tasks.utils import adapt_context
+
 enums = mjbindings.enums
 mjlib = mjbindings.mjlib
 
 
 _DEFAULT_TIME_LIMIT = 20
-_CONTROL_TIMESTEP = .02
+_CONTROL_TIMESTEP = 0.02
 STEP_LIMIT = 1000
 
 # Horizontal speeds above which the move reward is 1.
@@ -48,47 +46,48 @@
 _TERRAIN_BUMP_SCALE = 2  # Spatial scale of terrain bumps (in meters).
 
 # Named model elements.
-_TOES = ['toe_front_left', 'toe_back_left', 'toe_back_right', 'toe_front_right']
-_WALLS = ['wall_px', 'wall_py', 'wall_nx', 'wall_ny']
+_TOES = ["toe_front_left", "toe_back_left", "toe_back_right", "toe_front_right"]
+_WALLS = ["wall_px", "wall_py", "wall_nx", "wall_ny"]
 
 SUITE = containers.TaggedTasks()
 
 
-def make_model(floor_size=None, terrain=False, rangefinders=False,
-               walls_and_ball=False):
+def make_model(
+    floor_size=None, terrain=False, rangefinders=False, walls_and_ball=False
+):
     """Returns the model XML string."""
-    xml_string = common.read_model('quadruped.xml')
+    xml_string = common.read_model("quadruped.xml")
     parser = etree.XMLParser(remove_blank_text=True)
     mjcf = etree.XML(xml_string, parser)
 
     # Set floor size.
     if floor_size is not None:
-        floor_geom = mjcf.find('.//geom[@name=\'floor\']')
-        floor_geom.attrib['size'] = f'{floor_size} {floor_size} .5'
+        floor_geom = mjcf.find(".//geom[@name='floor']")
+        floor_geom.attrib["size"] = f"{floor_size} {floor_size} .5"
 
     # Remove walls, ball and target.
     if not walls_and_ball:
         for wall in _WALLS:
-            wall_geom = xml_tools.find_element(mjcf, 'geom', wall)
+            wall_geom = xml_tools.find_element(mjcf, "geom", wall)
             wall_geom.getparent().remove(wall_geom)
 
         # Remove ball.
-        ball_body = xml_tools.find_element(mjcf, 'body', 'ball')
+        ball_body = xml_tools.find_element(mjcf, "body", "ball")
         ball_body.getparent().remove(ball_body)
 
         # Remove target.
-        target_site = xml_tools.find_element(mjcf, 'site', 'target')
+        target_site = xml_tools.find_element(mjcf, "site", "target")
         target_site.getparent().remove(target_site)
 
     # Remove terrain.
     if not terrain:
-        terrain_geom = xml_tools.find_element(mjcf, 'geom', 'terrain')
+        terrain_geom = xml_tools.find_element(mjcf, "geom", "terrain")
         terrain_geom.getparent().remove(terrain_geom)
 
     # Remove rangefinders if they're not used, as range computations can be
     # expensive, especially in a scene with heightfields.
     if not rangefinders:
-        rangefinder_sensors = mjcf.findall('.//rangefinder')
+        rangefinder_sensors = mjcf.findall(".//rangefinder")
         for rf in rangefinder_sensors:
             rf.getparent().remove(rf)
 
@@ -96,60 +95,107 @@ def make_model(floor_size=None, terrain=False, rangefinders=False,
 
 
 @SUITE.add()
-def walk_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+def walk_context(
+    context={},
+    context_mask=[],
+    time_limit=_DEFAULT_TIME_LIMIT,
+    random=None,
+    environment_kwargs=None,
+):
     """Returns the Walk task with the adapted context."""
     xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _WALK_SPEED)
     if context != {}:
-        xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
+        xml_string = adapt_context(
+            xml_string=xml_string, context=context, context_mask=context_mask
+        )
     physics = Physics.from_xml_string(xml_string, common.ASSETS)
     task = Move(desired_speed=_WALK_SPEED, random=random)
     environment_kwargs = environment_kwargs or {}
-    return control.Environment(physics, task, time_limit=time_limit,
-                               control_timestep=_CONTROL_TIMESTEP,
-                               **environment_kwargs)
+    return control.Environment(
+        physics,
+        task,
+        time_limit=time_limit,
+        control_timestep=_CONTROL_TIMESTEP,
+        **environment_kwargs,
+    )
 
 
 @SUITE.add()
-def run_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+def run_context(
+    context={},
+    context_mask=[],
+    time_limit=_DEFAULT_TIME_LIMIT,
+    random=None,
+    environment_kwargs=None,
+):
     """Returns the Run task with the adapted context."""
     xml_string = make_model(floor_size=_DEFAULT_TIME_LIMIT * _RUN_SPEED)
     if context != {}:
-        xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
+        xml_string = adapt_context(
+            xml_string=xml_string, context=context, context_mask=context_mask
+        )
     physics = Physics.from_xml_string(xml_string, common.ASSETS)
     task = Move(desired_speed=_RUN_SPEED, random=random)
     environment_kwargs = environment_kwargs or {}
-    return control.Environment(physics, task, time_limit=time_limit,
-                               control_timestep=_CONTROL_TIMESTEP,
-                               **environment_kwargs)
+    return control.Environment(
+        physics,
+        task,
+        time_limit=time_limit,
+        control_timestep=_CONTROL_TIMESTEP,
+        **environment_kwargs,
+    )
 
 
 @SUITE.add()
-def escape_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None,
-                   environment_kwargs=None):
+def escape_context(
+    context={},
+    context_mask=[],
+    time_limit=_DEFAULT_TIME_LIMIT,
+    random=None,
+    environment_kwargs=None,
+):
     """Returns the Escape task with the adapted context."""
     xml_string = make_model(floor_size=40, terrain=True, rangefinders=True)
     if context != {}:
-        xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
+        xml_string = adapt_context(
+            xml_string=xml_string, context=context, context_mask=context_mask
+        )
     physics = Physics.from_xml_string(xml_string, common.ASSETS)
     task = Escape(random=random)
     environment_kwargs = environment_kwargs or {}
-    return control.Environment(physics, task, time_limit=time_limit,
-                               control_timestep=_CONTROL_TIMESTEP,
-                               **environment_kwargs)
+    return control.Environment(
+        physics,
+        task,
+        time_limit=time_limit,
+        control_timestep=_CONTROL_TIMESTEP,
+        **environment_kwargs,
+    )
 
 
 @SUITE.add()
-def fetch_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+def fetch_context(
+    context={},
+    context_mask=[],
+    time_limit=_DEFAULT_TIME_LIMIT,
+    random=None,
+    environment_kwargs=None,
+):
     """Returns the Fetch task with the adapted context."""
     xml_string = make_model(walls_and_ball=True)
     if context != {}:
-        xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
+        xml_string = adapt_context(
+            xml_string=xml_string, context=context, context_mask=context_mask
+        )
     physics = Physics.from_xml_string(xml_string, common.ASSETS)
     task = Fetch(random=random)
     environment_kwargs = environment_kwargs or {}
-    return control.Environment(physics, task, time_limit=time_limit,
-                               control_timestep=_CONTROL_TIMESTEP,
-                               **environment_kwargs)
+    return control.Environment(
+        physics,
+        task,
+        time_limit=time_limit,
+        control_timestep=_CONTROL_TIMESTEP,
+        **environment_kwargs,
+    )
 
 
 class Physics(mujoco.Physics):
@@ -166,46 +212,52 @@ def _get_sensor_names(self, *sensor_types):
             sensor_names = self._sensor_types_to_names[sensor_types]
         except KeyError:
             [sensor_ids] = np.where(np.in1d(self.model.sensor_type, sensor_types))
-            sensor_names = [self.model.id2name(s_id, 'sensor') for s_id in sensor_ids]
+            sensor_names = [self.model.id2name(s_id, "sensor") for s_id in sensor_ids]
             self._sensor_types_to_names[sensor_types] = sensor_names
         return sensor_names
 
     def torso_upright(self):
         """Returns the dot-product of the torso z-axis and the global z-axis."""
-        return np.asarray(self.named.data.xmat['torso', 'zz'])
+        return np.asarray(self.named.data.xmat["torso", "zz"])
 
     def torso_velocity(self):
         """Returns the velocity of the torso, in the local frame."""
-        return self.named.data.sensordata['velocimeter'].copy()
+        return self.named.data.sensordata["velocimeter"].copy()
 
     def egocentric_state(self):
         """Returns the state without global orientation or position."""
         if not self._hinge_names:
-            [hinge_ids] = np.nonzero(self.model.jnt_type ==
-                                     enums.mjtJoint.mjJNT_HINGE)
-            self._hinge_names = [self.model.id2name(j_id, 'joint')
-                                 for j_id in hinge_ids]
-        return np.hstack((self.named.data.qpos[self._hinge_names],
-                          self.named.data.qvel[self._hinge_names],
-                          self.data.act))
+            [hinge_ids] = np.nonzero(self.model.jnt_type == enums.mjtJoint.mjJNT_HINGE)
+            self._hinge_names = [
+                self.model.id2name(j_id, "joint") for j_id in hinge_ids
+            ]
+        return np.hstack(
+            (
+                self.named.data.qpos[self._hinge_names],
+                self.named.data.qvel[self._hinge_names],
+                self.data.act,
+            )
+        )
 
     def toe_positions(self):
         """Returns toe positions in egocentric frame."""
-        torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
-        torso_pos = self.named.data.xpos['torso']
+        torso_frame = self.named.data.xmat["torso"].reshape(3, 3)
+        torso_pos = self.named.data.xpos["torso"]
         torso_to_toe = self.named.data.xpos[_TOES] - torso_pos
         return torso_to_toe.dot(torso_frame)
 
     def force_torque(self):
         """Returns scaled force/torque sensor readings at the toes."""
-        force_torque_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_FORCE,
-                                                      enums.mjtSensor.mjSENS_TORQUE)
+        force_torque_sensors = self._get_sensor_names(
+            enums.mjtSensor.mjSENS_FORCE, enums.mjtSensor.mjSENS_TORQUE
+        )
         return np.arcsinh(self.named.data.sensordata[force_torque_sensors])
 
     def imu(self):
         """Returns IMU-like sensor readings."""
-        imu_sensors = self._get_sensor_names(enums.mjtSensor.mjSENS_GYRO,
-                                             enums.mjtSensor.mjSENS_ACCELEROMETER)
+        imu_sensors = self._get_sensor_names(
+            enums.mjtSensor.mjSENS_GYRO, enums.mjtSensor.mjSENS_ACCELEROMETER
+        )
         return self.named.data.sensordata[imu_sensors]
 
     def rangefinder(self):
@@ -217,41 +269,43 @@ def rangefinder(self):
 
     def origin_distance(self):
         """Returns the distance from the origin to the workspace."""
-        return np.asarray(np.linalg.norm(self.named.data.site_xpos['workspace']))
+        return np.asarray(np.linalg.norm(self.named.data.site_xpos["workspace"]))
 
     def origin(self):
         """Returns origin position in the torso frame."""
-        torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
-        torso_pos = self.named.data.xpos['torso']
+        torso_frame = self.named.data.xmat["torso"].reshape(3, 3)
+        torso_pos = self.named.data.xpos["torso"]
         return -torso_pos.dot(torso_frame)
 
     def ball_state(self):
         """Returns ball position and velocity relative to the torso frame."""
         data = self.named.data
-        torso_frame = data.xmat['torso'].reshape(3, 3)
-        ball_rel_pos = data.xpos['ball'] - data.xpos['torso']
-        ball_rel_vel = data.qvel['ball_root'][:3] - data.qvel['root'][:3]
-        ball_rot_vel = data.qvel['ball_root'][3:]
+        torso_frame = data.xmat["torso"].reshape(3, 3)
+        ball_rel_pos = data.xpos["ball"] - data.xpos["torso"]
+        ball_rel_vel = data.qvel["ball_root"][:3] - data.qvel["root"][:3]
+        ball_rot_vel = data.qvel["ball_root"][3:]
         ball_state = np.vstack((ball_rel_pos, ball_rel_vel, ball_rot_vel))
         return ball_state.dot(torso_frame).ravel()
 
     def target_position(self):
         """Returns target position in torso frame."""
-        torso_frame = self.named.data.xmat['torso'].reshape(3, 3)
-        torso_pos = self.named.data.xpos['torso']
-        torso_to_target = self.named.data.site_xpos['target'] - torso_pos
+        torso_frame = self.named.data.xmat["torso"].reshape(3, 3)
+        torso_pos = self.named.data.xpos["torso"]
+        torso_to_target = self.named.data.site_xpos["target"] - torso_pos
         return torso_to_target.dot(torso_frame)
 
     def ball_to_target_distance(self):
         """Returns horizontal distance from the ball to the target."""
-        ball_to_target = (self.named.data.site_xpos['target'] -
-                          self.named.data.xpos['ball'])
+        ball_to_target = (
+            self.named.data.site_xpos["target"] - self.named.data.xpos["ball"]
+        )
         return np.linalg.norm(ball_to_target[:2])
 
     def self_to_ball_distance(self):
         """Returns horizontal distance from the quadruped workspace to the ball."""
-        self_to_ball = (self.named.data.site_xpos['workspace']
-                        - self.named.data.xpos['ball'])
+        self_to_ball = (
+            self.named.data.site_xpos["workspace"] - self.named.data.xpos["ball"]
+        )
         return np.linalg.norm(self_to_ball[:2])
 
 
@@ -273,8 +327,8 @@ def _find_non_contacting_height(physics, orientation, x_pos=0.0, y_pos=0.0):
     while num_contacts > 0:
         try:
             with physics.reset_context():
-                physics.named.data.qpos['root'][:3] = x_pos, y_pos, z_pos
-                physics.named.data.qpos['root'][3:] = orientation
+                physics.named.data.qpos["root"][:3] = x_pos, y_pos, z_pos
+                physics.named.data.qpos["root"][3:] = orientation
         except control.PhysicsError:
             # We may encounter a PhysicsError here due to filling the contact
             # buffer, in which case we simply increment the height and continue.
@@ -283,17 +337,17 @@ def _find_non_contacting_height(physics, orientation, x_pos=0.0, y_pos=0.0):
         z_pos += 0.01
         num_attempts += 1
         if num_attempts > 10000:
-            raise RuntimeError('Failed to find a non-contacting configuration.')
+            raise RuntimeError("Failed to find a non-contacting configuration.")
 
 
 def _common_observations(physics):
     """Returns the observations common to all tasks."""
     obs = collections.OrderedDict()
-    obs['egocentric_state'] = physics.egocentric_state()
-    obs['torso_velocity'] = physics.torso_velocity()
-    obs['torso_upright'] = physics.torso_upright()
-    obs['imu'] = physics.imu()
-    obs['force_torque'] = physics.force_torque()
+    obs["egocentric_state"] = physics.egocentric_state()
+    obs["torso_velocity"] = physics.torso_velocity()
+    obs["torso_upright"] = physics.torso_upright()
+    obs["imu"] = physics.imu()
+    obs["force_torque"] = physics.force_torque()
     return obs
 
 
@@ -308,10 +362,11 @@ def _upright_reward(physics, deviation_angle=0):
     deviation = np.cos(np.deg2rad(deviation_angle))
     return rewards.tolerance(
         physics.torso_upright(),
-        bounds=(deviation, float('inf')),
-        sigmoid='linear',
+        bounds=(deviation, float("inf")),
+        sigmoid="linear",
         margin=1 + deviation,
-        value_at_margin=0)
+        value_at_margin=0,
+    )
 
 
 class Move(base.Task):
@@ -351,10 +406,11 @@ def get_reward(self, physics):
         # Move reward term.
         move_reward = rewards.tolerance(
             physics.torso_velocity()[0],
-            bounds=(self._desired_speed, float('inf')),
+            bounds=(self._desired_speed, float("inf")),
             margin=self._desired_speed,
             value_at_margin=0.5,
-            sigmoid='linear')
+            sigmoid="linear",
+        )
 
         return _upright_reward(physics) * move_reward
 
@@ -371,9 +427,9 @@ def initialize_episode(self, physics):
         res = physics.model.hfield_nrow[_HEIGHTFIELD_ID]
         assert res == physics.model.hfield_ncol[_HEIGHTFIELD_ID]
         # Sinusoidal bowl shape.
-        row_grid, col_grid = np.ogrid[-1:1:res*1j, -1:1:res*1j]
-        radius = np.clip(np.sqrt(col_grid**2 + row_grid**2), .04, 1)
-        bowl_shape = .5 - np.cos(2*np.pi*radius)/2
+        row_grid, col_grid = np.ogrid[-1 : 1 : res * 1j, -1 : 1 : res * 1j]
+        radius = np.clip(np.sqrt(col_grid**2 + row_grid**2), 0.04, 1)
+        bowl_shape = 0.5 - np.cos(2 * np.pi * radius) / 2
         # Random smooth bumps.
         terrain_size = 2 * physics.model.hfield_size[_HEIGHTFIELD_ID, 0]
         bump_res = int(terrain_size / _TERRAIN_BUMP_SCALE)
@@ -382,17 +438,19 @@ def initialize_episode(self, physics):
         # Terrain is elementwise product.
         terrain = bowl_shape * smooth_bumps
         start_idx = physics.model.hfield_adr[_HEIGHTFIELD_ID]
-        physics.model.hfield_data[start_idx:start_idx+res**2] = terrain.ravel()
+        physics.model.hfield_data[start_idx : start_idx + res**2] = terrain.ravel()
         super().initialize_episode(physics)
 
         # If we have a rendering context, we need to re-upload the modified
         # heightfield data.
         if physics.contexts:
             with physics.contexts.gl.make_current() as ctx:
-                ctx.call(mjlib.mjr_uploadHField,
-                         physics.model.ptr,
-                         physics.contexts.mujoco.ptr,
-                         _HEIGHTFIELD_ID)
+                ctx.call(
+                    mjlib.mjr_uploadHField,
+                    physics.model.ptr,
+                    physics.contexts.mujoco.ptr,
+                    _HEIGHTFIELD_ID,
+                )
 
         # Initial configuration.
         orientation = self.random.randn(4)
@@ -402,8 +460,8 @@ def initialize_episode(self, physics):
     def get_observation(self, physics):
         """Returns an observation to the agent."""
         obs = _common_observations(physics)
-        obs['origin'] = physics.origin()
-        obs['rangefinder'] = physics.rangefinder()
+        obs["origin"] = physics.origin()
+        obs["rangefinder"] = physics.rangefinder()
         return obs
 
     def get_reward(self, physics):
@@ -413,10 +471,11 @@ def get_reward(self, physics):
         terrain_size = physics.model.hfield_size[_HEIGHTFIELD_ID, 0]
         escape_reward = rewards.tolerance(
             physics.origin_distance(),
-            bounds=(terrain_size, float('inf')),
+            bounds=(terrain_size, float("inf")),
             margin=terrain_size,
             value_at_margin=0,
-            sigmoid='linear')
+            sigmoid="linear",
+        )
 
         return _upright_reward(physics, deviation_angle=20) * escape_reward
 
@@ -430,47 +489,52 @@ def initialize_episode(self, physics):
           physics: An instance of `Physics`.
         """
         # Initial configuration, random azimuth and horizontal position.
-        azimuth = self.random.uniform(0, 2*np.pi)
-        orientation = np.array((np.cos(azimuth/2), 0, 0, np.sin(azimuth/2)))
-        spawn_radius = 0.9 * physics.named.model.geom_size['floor', 0]
+        azimuth = self.random.uniform(0, 2 * np.pi)
+        orientation = np.array((np.cos(azimuth / 2), 0, 0, np.sin(azimuth / 2)))
+        spawn_radius = 0.9 * physics.named.model.geom_size["floor", 0]
         x_pos, y_pos = self.random.uniform(-spawn_radius, spawn_radius, size=(2,))
         _find_non_contacting_height(physics, orientation, x_pos, y_pos)
 
         # Initial ball state.
-        physics.named.data.qpos['ball_root'][:2] = self.random.uniform(
-            -spawn_radius, spawn_radius, size=(2,))
-        physics.named.data.qpos['ball_root'][2] = 2
-        physics.named.data.qvel['ball_root'][:2] = 5*self.random.randn(2)
+        physics.named.data.qpos["ball_root"][:2] = self.random.uniform(
+            -spawn_radius, spawn_radius, size=(2,)
+        )
+        physics.named.data.qpos["ball_root"][2] = 2
+        physics.named.data.qvel["ball_root"][:2] = 5 * self.random.randn(2)
         super().initialize_episode(physics)
 
     def get_observation(self, physics):
         """Returns an observation to the agent."""
         obs = _common_observations(physics)
-        obs['ball_state'] = physics.ball_state()
-        obs['target_position'] = physics.target_position()
+        obs["ball_state"] = physics.ball_state()
+        obs["target_position"] = physics.target_position()
         return obs
 
     def get_reward(self, physics):
         """Returns a reward to the agent."""
 
         # Reward for moving close to the ball.
-        arena_radius = physics.named.model.geom_size['floor', 0] * np.sqrt(2)
-        workspace_radius = physics.named.model.site_size['workspace', 0]
-        ball_radius = physics.named.model.geom_size['ball', 0]
+        arena_radius = physics.named.model.geom_size["floor", 0] * np.sqrt(2)
+        workspace_radius = physics.named.model.site_size["workspace", 0]
+        ball_radius = physics.named.model.geom_size["ball", 0]
         reach_reward = rewards.tolerance(
             physics.self_to_ball_distance(),
-            bounds=(0, workspace_radius+ball_radius),
-            sigmoid='linear',
-            margin=arena_radius, value_at_margin=0)
+            bounds=(0, workspace_radius + ball_radius),
+            sigmoid="linear",
+            margin=arena_radius,
+            value_at_margin=0,
+        )
 
         # Reward for bringing the ball to the target.
-        target_radius = physics.named.model.site_size['target', 0]
+        target_radius = physics.named.model.site_size["target", 0]
         fetch_reward = rewards.tolerance(
             physics.ball_to_target_distance(),
             bounds=(0, target_radius),
-            sigmoid='linear',
-            margin=arena_radius, value_at_margin=0)
+            sigmoid="linear",
+            margin=arena_radius,
+            value_at_margin=0,
+        )
 
-        reach_then_fetch = reach_reward * (0.5 + 0.5*fetch_reward)
+        reach_then_fetch = reach_reward * (0.5 + 0.5 * fetch_reward)
 
         return _upright_reward(physics) * reach_then_fetch
diff --git a/carl/envs/dmc/dmc_tasks/utils.py b/carl/envs/dmc/dmc_tasks/utils.py
index 54419259..2dd72d4d 100644
--- a/carl/envs/dmc/dmc_tasks/utils.py
+++ b/carl/envs/dmc/dmc_tasks/utils.py
@@ -13,13 +13,17 @@ def adapt_context(xml_string, context, context_mask=[]):
         # adjust damping for all joints if damping is already an attribute
         for joint_find in mjcf.findall(".//joint[@damping]"):
             joint_damping = joint_find.get("damping")
-            joint_find.set("damping", str(float(joint_damping) * context["joint_damping"]))
+            joint_find.set(
+                "damping", str(float(joint_damping) * context["joint_damping"])
+            )
 
     if "joint_stiffness" not in context_mask:
         # adjust stiffness for all joints if stiffness is already an attribute
         for joint_find in mjcf.findall(".//joint[@stiffness]"):
             joint_stiffness = joint_find.get("stiffness")
-            joint_find.set("stiffness", str(float(joint_stiffness) * context["joint_stiffness"]))
+            joint_find.set(
+                "stiffness", str(float(joint_stiffness) * context["joint_stiffness"])
+            )
 
     # set default joint damping if default/joint is not present
     joint = mjcf.find("./default/joint")
@@ -28,7 +32,9 @@ def adapt_context(xml_string, context, context_mask=[]):
         default.addnext(joint)
         if "joint_damping" not in context_mask:
             def_joint_damping = 0.1
-            default_joint_damping = str(float(def_joint_damping) * context["joint_damping"])
+            default_joint_damping = str(
+                float(def_joint_damping) * context["joint_damping"]
+            )
             joint.set("damping", default_joint_damping)
         if "joint_stiffness" not in context_mask:
             default_joint_stiffness = str(context["joint_stiffness"])
@@ -41,9 +47,13 @@ def adapt_context(xml_string, context, context_mask=[]):
         for i, (f, d) in enumerate(
             zip(
                 friction,
-                [context["friction_tangential"],
-                 context["friction_torsional"],
-                 context["friction_rolling"]])):
+                [
+                    context["friction_tangential"],
+                    context["friction_torsional"],
+                    context["friction_rolling"],
+                ],
+            )
+        ):
             if (
                 (i == 0 and "friction_tangential" not in context_mask)
                 or (i == 1 and "friction_torsional" not in context_mask)
@@ -57,7 +67,10 @@ def adapt_context(xml_string, context, context_mask=[]):
     if "geom_density" not in context_mask:
         # adjust density for all geom elements with density attribute
         for geom_find in mjcf.findall(".//geom[@density]"):
-            geom_find.set("density", str(float(geom_find.get("density")) * context["geom_density"]))
+            geom_find.set(
+                "density",
+                str(float(geom_find.get("density")) * context["geom_density"]),
+            )
 
     # create default geom if it does not exist
     geom = mjcf.find("./default/geom")
@@ -67,17 +80,33 @@ def adapt_context(xml_string, context, context_mask=[]):
 
     # set default friction
     if geom.get("friction") is None:
-        default_friction_tangential = 1.
+        default_friction_tangential = 1.0
         default_friction_torsional = 0.005
         default_friction_rolling = 0.0001
-        geom.set("friction", " ".join([
-            (str(default_friction_tangential * context["friction_tangential"])
-             if "friction_tangential" not in context_mask else str(default_friction_tangential)),
-            (str(default_friction_torsional * context["friction_torsional"])
-                if "friction_torsional" not in context_mask else str(default_friction_torsional)),
-            (str(default_friction_rolling * context["friction_rolling"])
-                if "friction_rolling" not in context_mask else str(default_friction_rolling)),
-        ]))
+        geom.set(
+            "friction",
+            " ".join(
+                [
+                    (
+                        str(
+                            default_friction_tangential * context["friction_tangential"]
+                        )
+                        if "friction_tangential" not in context_mask
+                        else str(default_friction_tangential)
+                    ),
+                    (
+                        str(default_friction_torsional * context["friction_torsional"])
+                        if "friction_torsional" not in context_mask
+                        else str(default_friction_torsional)
+                    ),
+                    (
+                        str(default_friction_rolling * context["friction_rolling"])
+                        if "friction_rolling" not in context_mask
+                        else str(default_friction_rolling)
+                    ),
+                ]
+            ),
+        )
 
     if "geom_density" not in context_mask:
         # set default density
@@ -114,17 +143,21 @@ def adapt_context(xml_string, context, context_mask=[]):
         wind = option.get("wind")
         if wind is not None:
             w = wind.split(" ")
-            wind = " ".join([
-                (str(context["wind_x"]) if "wind_x" not in context_mask else w[0]),
-                (str(context["wind_y"]) if "wind_y" not in context_mask else w[1]),
-                (str(context["wind_z"]) if "wind_z" not in context_mask else w[2]),
-            ])
+            wind = " ".join(
+                [
+                    (str(context["wind_x"]) if "wind_x" not in context_mask else w[0]),
+                    (str(context["wind_y"]) if "wind_y" not in context_mask else w[1]),
+                    (str(context["wind_z"]) if "wind_z" not in context_mask else w[2]),
+                ]
+            )
         else:
-            wind = " ".join([
-                (str(context["wind_x"]) if "wind_x" not in context_mask else "0"),
-                (str(context["wind_y"]) if "wind_y" not in context_mask else "0"),
-                (str(context["wind_z"]) if "wind_z" not in context_mask else "0"),
-            ])
+            wind = " ".join(
+                [
+                    (str(context["wind_x"]) if "wind_x" not in context_mask else "0"),
+                    (str(context["wind_y"]) if "wind_y" not in context_mask else "0"),
+                    (str(context["wind_z"]) if "wind_z" not in context_mask else "0"),
+                ]
+            )
         option.set("wind", wind)
 
     if "timestep" not in context_mask:
diff --git a/carl/envs/dmc/dmc_tasks/walker.py b/carl/envs/dmc/dmc_tasks/walker.py
index 3c32bda5..9f75142b 100644
--- a/carl/envs/dmc/dmc_tasks/walker.py
+++ b/carl/envs/dmc/dmc_tasks/walker.py
@@ -19,16 +19,14 @@
 
 from dm_control import mujoco
 from dm_control.rl import control
-from dm_control.suite import base
-from dm_control.suite import common
+from dm_control.suite import base, common
 from dm_control.suite.utils import randomizers
-from dm_control.utils import containers
-from dm_control.utils import rewards
-from carl.envs.dmc.dmc_tasks.utils import adapt_context
+from dm_control.utils import containers, rewards
 
+from carl.envs.dmc.dmc_tasks.utils import adapt_context
 
 _DEFAULT_TIME_LIMIT = 25
-_CONTROL_TIMESTEP = .025
+_CONTROL_TIMESTEP = 0.025
 
 # Minimal height of torso over foot above which stand reward is 1.
 _STAND_HEIGHT = 1.2
@@ -45,49 +43,85 @@
 
 def get_model_and_assets():
     """Returns a tuple containing the model XML string and a dict of assets."""
-    return common.read_model('walker.xml'), common.ASSETS
+    return common.read_model("walker.xml"), common.ASSETS
 
 
-@SUITE.add('benchmarking')
-def stand_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+@SUITE.add("benchmarking")
+def stand_context(
+    context={},
+    context_mask=[],
+    time_limit=_DEFAULT_TIME_LIMIT,
+    random=None,
+    environment_kwargs=None,
+):
     """Returns the Stand task with the adapted context."""
     xml_string, assets = get_model_and_assets()
     if context != {}:
-        xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
+        xml_string = adapt_context(
+            xml_string=xml_string, context=context, context_mask=context_mask
+        )
     physics = Physics.from_xml_string(xml_string, assets)
     task = PlanarWalker(move_speed=0, random=random)
     environment_kwargs = environment_kwargs or {}
     return control.Environment(
-        physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
-        **environment_kwargs)
-
-
-@SUITE.add('benchmarking')
-def walk_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+        physics,
+        task,
+        time_limit=time_limit,
+        control_timestep=_CONTROL_TIMESTEP,
+        **environment_kwargs,
+    )
+
+
+@SUITE.add("benchmarking")
+def walk_context(
+    context={},
+    context_mask=[],
+    time_limit=_DEFAULT_TIME_LIMIT,
+    random=None,
+    environment_kwargs=None,
+):
     """Returns the Walk task with the adapted context."""
     xml_string, assets = get_model_and_assets()
     if context != {}:
-        xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
+        xml_string = adapt_context(
+            xml_string=xml_string, context=context, context_mask=context_mask
+        )
     physics = Physics.from_xml_string(xml_string, assets)
     task = PlanarWalker(move_speed=_WALK_SPEED, random=random)
     environment_kwargs = environment_kwargs or {}
     return control.Environment(
-        physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
-        **environment_kwargs)
-
-
-@SUITE.add('benchmarking')
-def run_context(context={}, context_mask=[], time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+        physics,
+        task,
+        time_limit=time_limit,
+        control_timestep=_CONTROL_TIMESTEP,
+        **environment_kwargs,
+    )
+
+
+@SUITE.add("benchmarking")
+def run_context(
+    context={},
+    context_mask=[],
+    time_limit=_DEFAULT_TIME_LIMIT,
+    random=None,
+    environment_kwargs=None,
+):
     """Returns the Run task with the adapted context."""
     xml_string, assets = get_model_and_assets()
     if context != {}:
-        xml_string = adapt_context(xml_string=xml_string, context=context, context_mask=context_mask)
+        xml_string = adapt_context(
+            xml_string=xml_string, context=context, context_mask=context_mask
+        )
     physics = Physics.from_xml_string(xml_string, assets)
     task = PlanarWalker(move_speed=_RUN_SPEED, random=random)
     environment_kwargs = environment_kwargs or {}
     return control.Environment(
-        physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
-        **environment_kwargs)
+        physics,
+        task,
+        time_limit=time_limit,
+        control_timestep=_CONTROL_TIMESTEP,
+        **environment_kwargs,
+    )
 
 
 class Physics(mujoco.Physics):
@@ -95,19 +129,19 @@ class Physics(mujoco.Physics):
 
     def torso_upright(self):
         """Returns projection from z-axes of torso to the z-axes of world."""
-        return self.named.data.xmat['torso', 'zz']
+        return self.named.data.xmat["torso", "zz"]
 
     def torso_height(self):
         """Returns the height of the torso."""
-        return self.named.data.xpos['torso', 'z']
+        return self.named.data.xpos["torso", "z"]
 
     def horizontal_velocity(self):
         """Returns the horizontal velocity of the center-of-mass."""
-        return self.named.data.sensordata['torso_subtreelinvel'][0]
+        return self.named.data.sensordata["torso_subtreelinvel"][0]
 
     def orientations(self):
         """Returns planar orientations of all bodies."""
-        return self.named.data.xmat[1:, ['xx', 'xz']].ravel()
+        return self.named.data.xmat[1:, ["xx", "xz"]].ravel()
 
 
 class PlanarWalker(base.Task):
@@ -139,24 +173,28 @@ def initialize_episode(self, physics):
     def get_observation(self, physics):
         """Returns an observation of body orientations, height and velocites."""
         obs = collections.OrderedDict()
-        obs['orientations'] = physics.orientations()
-        obs['height'] = physics.torso_height()
-        obs['velocity'] = physics.velocity()
+        obs["orientations"] = physics.orientations()
+        obs["height"] = physics.torso_height()
+        obs["velocity"] = physics.velocity()
         return obs
 
     def get_reward(self, physics):
         """Returns a reward to the agent."""
-        standing = rewards.tolerance(physics.torso_height(),
-                                     bounds=(_STAND_HEIGHT, float('inf')),
-                                     margin=_STAND_HEIGHT/2)
+        standing = rewards.tolerance(
+            physics.torso_height(),
+            bounds=(_STAND_HEIGHT, float("inf")),
+            margin=_STAND_HEIGHT / 2,
+        )
         upright = (1 + physics.torso_upright()) / 2
-        stand_reward = (3*standing + upright) / 4
+        stand_reward = (3 * standing + upright) / 4
         if self._move_speed == 0:
             return stand_reward
         else:
-            move_reward = rewards.tolerance(physics.horizontal_velocity(),
-                                            bounds=(self._move_speed, float('inf')),
-                                            margin=self._move_speed/2,
-                                            value_at_margin=0.5,
-                                            sigmoid='linear')
-            return stand_reward * (5*move_reward + 1) / 6
+            move_reward = rewards.tolerance(
+                physics.horizontal_velocity(),
+                bounds=(self._move_speed, float("inf")),
+                margin=self._move_speed / 2,
+                value_at_margin=0.5,
+                sigmoid="linear",
+            )
+            return stand_reward * (5 * move_reward + 1) / 6
diff --git a/carl/envs/dmc/loader.py b/carl/envs/dmc/loader.py
index 4935425d..2d8bb033 100644
--- a/carl/envs/dmc/loader.py
+++ b/carl/envs/dmc/loader.py
@@ -2,27 +2,39 @@
 
 from dm_control import suite
 
-from carl.envs.dmc.dmc_tasks import walker, quadruped, fish  # noqa: F401
+from carl.envs.dmc.dmc_tasks import fish, quadruped, walker  # noqa: F401
 
-_DOMAINS = {name: module for name, module in locals().items()
-            if inspect.ismodule(module) and hasattr(module, 'SUITE')}
+_DOMAINS = {
+    name: module
+    for name, module in locals().items()
+    if inspect.ismodule(module) and hasattr(module, "SUITE")
+}
 
 
-def load_dmc_env(domain_name, task_name, context={}, context_mask=[], task_kwargs=None, environment_kwargs=None,
-                 visualize_reward=False):
+def load_dmc_env(
+    domain_name,
+    task_name,
+    context={},
+    context_mask=[],
+    task_kwargs=None,
+    environment_kwargs=None,
+    visualize_reward=False,
+):
 
     if domain_name in _DOMAINS:
         domain = _DOMAINS[domain_name]
     elif domain_name in suite._DOMAINS:
         domain = suite._DOMAINS[domain_name]
     else:
-        raise ValueError('Domain {!r} does not exist.'.format(domain_name))
+        raise ValueError("Domain {!r} does not exist.".format(domain_name))
 
     if task_name in domain.SUITE:
         task_kwargs = task_kwargs or {}
         if environment_kwargs is not None:
             task_kwargs = dict(task_kwargs, environment_kwargs=environment_kwargs)
-        env = domain.SUITE[task_name](context=context, context_mask=context_mask, **task_kwargs)
+        env = domain.SUITE[task_name](
+            context=context, context_mask=context_mask, **task_kwargs
+        )
         env.task.visualize_reward = visualize_reward
         return env
     elif (domain_name, task_name) in suite.ALL_TASKS:
@@ -34,5 +46,6 @@ def load_dmc_env(domain_name, task_name, context={}, context_mask=[], task_kwarg
             visualize_reward=visualize_reward,
         )
     else:
-        raise ValueError('Task {!r} does not exist in domain {!r}.'.format(
-            task_name, domain_name))
+        raise ValueError(
+            "Task {!r} does not exist in domain {!r}.".format(task_name, domain_name)
+        )
diff --git a/carl/envs/dmc/try_dm_control.py b/carl/envs/dmc/try_dm_control.py
index ed026942..39c94b30 100644
--- a/carl/envs/dmc/try_dm_control.py
+++ b/carl/envs/dmc/try_dm_control.py
@@ -1,21 +1,28 @@
 # flake8: noqa: F401
-from carl.envs import CARLDmcWalkerEnv
-from carl.envs import CARLDmcQuadrupedEnv
+import matplotlib.pyplot as plt
+
 from carl.envs import CARLDmcFishEnv
-from carl.envs import CARLDmcWalkerEnv_defaults as walker_default
-from carl.envs import CARLDmcQuadrupedEnv_defaults as quadruped_default
 from carl.envs import CARLDmcFishEnv_defaults as fish_default
-from carl.envs import CARLDmcWalkerEnv_mask as walker_mask
-from carl.envs import CARLDmcQuadrupedEnv_mask as quadruped_mask
 from carl.envs import CARLDmcFishEnv_mask as fish_mask
-import matplotlib.pyplot as plt
+from carl.envs import CARLDmcQuadrupedEnv
+from carl.envs import CARLDmcQuadrupedEnv_defaults as quadruped_default
+from carl.envs import CARLDmcQuadrupedEnv_mask as quadruped_mask
+from carl.envs import CARLDmcWalkerEnv
+from carl.envs import CARLDmcWalkerEnv_defaults as walker_default
+from carl.envs import CARLDmcWalkerEnv_mask as walker_mask
 
 if __name__ == "__main__":
     # Load one task:
     stronger_act = walker_default.copy()
-    stronger_act["actuator_strength"] = walker_default["actuator_strength"]*2
+    stronger_act["actuator_strength"] = walker_default["actuator_strength"] * 2
     contexts = {0: stronger_act}
-    carl_env = CARLDmcWalkerEnv(task="stand_context", contexts=contexts, context_mask=walker_mask, hide_context=False, dict_observation_space=True)
+    carl_env = CARLDmcWalkerEnv(
+        task="stand_context",
+        contexts=contexts,
+        context_mask=walker_mask,
+        hide_context=False,
+        dict_observation_space=True,
+    )
 
     # stronger_act = quadruped_default.copy()
     # stronger_act["actuator_strength"] = quadruped_default["actuator_strength"]*2
@@ -25,7 +32,7 @@
     # contexts = {0: fish_default}
     # carl_env = CARLDmcFishEnv(task="swim_context", contexts=contexts, context_mask=fish_mask, hide_context=False)
 
-    render = lambda : plt.imshow(carl_env.render(mode='rgb_array'))
+    render = lambda: plt.imshow(carl_env.render(mode="rgb_array"))
     s = carl_env.reset()
     render()
     # plt.savefig("dm_render.png")
diff --git a/carl/envs/dmc/wrappers.py b/carl/envs/dmc/wrappers.py
index c377be71..79586ffe 100644
--- a/carl/envs/dmc/wrappers.py
+++ b/carl/envs/dmc/wrappers.py
@@ -1,8 +1,8 @@
-from typing import Tuple, Optional, Union, TypeVar
+from typing import Optional, Tuple, TypeVar, Union
 
-from dm_control.rl.control import Environment
 import gym
 import numpy as np
+from dm_control.rl.control import Environment
 from dm_env import StepType
 from gym import spaces
 
@@ -16,7 +16,9 @@ def __init__(self, env: Environment):
         self.env = env
 
         action_spec = self.env.action_spec()
-        self.action_space = spaces.Box(action_spec.minimum, action_spec.maximum, dtype=action_spec.dtype)
+        self.action_space = spaces.Box(
+            action_spec.minimum, action_spec.maximum, dtype=action_spec.dtype
+        )
 
         obs_spec = self.env.observation_spec()
         # obs_spaces = {
@@ -29,7 +31,9 @@ def __init__(self, env: Environment):
         lows = np.array([-np.inf] * shapes[0])
         highs = np.array([np.inf] * shapes[0])
         dtype = np.unique([[v.dtype for v in obs_spec.values()]])[0]
-        self.observation_space = spaces.Box(low=lows, high=highs, shape=shapes, dtype=dtype)
+        self.observation_space = spaces.Box(
+            low=lows, high=highs, shape=shapes, dtype=dtype
+        )
 
     def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]:
         """Run one timestep of the environment's dynamics. When end of
@@ -56,10 +60,7 @@ def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]:
             observation = timestep.observation["observations"]
         else:
             raise NotImplementedError
-        info = {
-            "step_type": step_type,
-            "discount": discount
-        }
+        info = {"step_type": step_type, "discount": discount}
         done = step_type == StepType.LAST
         return observation, reward, done, info
 
@@ -70,7 +71,9 @@ def reset(
         return_info: bool = False,
         options: Optional[dict] = None,
     ) -> Union[ObsType, tuple[ObsType, dict]]:
-        super(MujocoToGymWrapper, self).reset(seed=seed, return_info=return_info, options=options)
+        super(MujocoToGymWrapper, self).reset(
+            seed=seed, return_info=return_info, options=options
+        )
         timestep = self.env.reset()
         if isinstance(self.observation_space, spaces.Box):
             observation = timestep.observation["observations"]
diff --git a/carl/envs/mario/carl_mario.py b/carl/envs/mario/carl_mario.py
index 949a9d12..759a068c 100644
--- a/carl/envs/mario/carl_mario.py
+++ b/carl/envs/mario/carl_mario.py
@@ -2,6 +2,7 @@
 
 import gym
 
+from carl.context.selection import AbstractSelector
 from carl.envs.carl_env import CARLEnv
 from carl.envs.mario.carl_mario_definitions import (
     DEFAULT_CONTEXT,
@@ -11,7 +12,6 @@
 from carl.envs.mario.mario_env import MarioEnv
 from carl.envs.mario.toad_gan import generate_level
 from carl.utils.trial_logger import TrialLogger
-from carl.context.selection import AbstractSelector
 
 
 class CARLMarioEnv(CARLEnv):
@@ -28,7 +28,9 @@ def __init__(
         state_context_features: Optional[List[str]] = None,
         context_mask: Optional[List[str]] = None,
         dict_observation_space: bool = False,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
         context_selector_kwargs: Optional[Dict] = None,
     ):
         if not contexts:
diff --git a/carl/envs/rna/carl_rna.py b/carl/envs/rna/carl_rna.py
index e2f20caf..9bebb689 100644
--- a/carl/envs/rna/carl_rna.py
+++ b/carl/envs/rna/carl_rna.py
@@ -1,9 +1,11 @@
-from typing import Dict, Optional, Union, List
+from typing import Dict, List, Optional, Union
+
+import os
 
 import gym
 import numpy as np
-import os
 
+from carl.context.selection import AbstractSelector
 from carl.envs.carl_env import CARLEnv
 from carl.envs.rna.carl_rna_definitions import (
     ACTION_SPACE,
@@ -16,7 +18,6 @@
     RnaDesignEnvironmentConfig,
 )
 from carl.utils.trial_logger import TrialLogger
-from carl.context.selection import AbstractSelector
 
 
 class RnaGymWrapper(object):
@@ -52,7 +53,9 @@ def __init__(
         logger: Optional[TrialLogger] = None,
         scale_context_features: str = "no",
         default_context: Optional[Dict] = DEFAULT_CONTEXT,
-        context_selector: Optional[Union[AbstractSelector, type(AbstractSelector)]] = None,
+        context_selector: Optional[
+            Union[AbstractSelector, type(AbstractSelector)]
+        ] = None,
         context_selector_kwargs: Optional[Dict] = None,
         context_mask: Optional[List[str]] = None,
     ):
diff --git a/carl/envs/rna/carl_rna_definitions.py b/carl/envs/rna/carl_rna_definitions.py
index f5090aed..fbeb5e1f 100644
--- a/carl/envs/rna/carl_rna_definitions.py
+++ b/carl/envs/rna/carl_rna_definitions.py
@@ -23,15 +23,23 @@
     "mutation_threshold": (0.1, np.inf, float),
     "reward_exponent": (0.1, np.inf, float),
     "state_radius": (1, np.inf, float),
-    "dataset": (None, None, "categorical", ["eterna", "rfam_taneda", "rfam_learn", None]),
+    "dataset": (
+        None,
+        None,
+        "categorical",
+        ["eterna", "rfam_taneda", "rfam_learn", None],
+    ),
     "target_structure_ids": (
-        None, None, "conditional",
+        None,
+        None,
+        "conditional",
         {
             "eterna": ID_LIST_ETERNA,
             "rfam_taneda": ID_LIST_RFAM_TANEDA,
-            "rfan_learn": ID_LIST_RFAM_LEARN, None: [None]
+            "rfan_learn": ID_LIST_RFAM_LEARN,
+            None: [None],
         },
-        "dataset"
+        "dataset",
     ),
     # "solvers": {
     #     "eterna": SOLVER_LIST_ETERNA,
diff --git a/carl/utils/types.py b/carl/utils/types.py
index fcb63e46..53ac9628 100644
--- a/carl/utils/types.py
+++ b/carl/utils/types.py
@@ -1,3 +1,3 @@
-from typing import Dict, Any
+from typing import Any, Dict
 
 Context = Dict[str, Any]
diff --git a/test/test_CARLEnv.py b/test/test_CARLEnv.py
index c1a20747..960b910e 100644
--- a/test/test_CARLEnv.py
+++ b/test/test_CARLEnv.py
@@ -1,11 +1,11 @@
+from typing import Any, Dict
+
 import unittest
 
 import numpy as np
-from typing import Dict, Any
-
-from carl.utils.types import Context
 
 from carl.envs.classic_control.carl_pendulum import CARLPendulumEnv
+from carl.utils.types import Context
 
 
 class TestStateConstruction(unittest.TestCase):
@@ -240,7 +240,7 @@ def test_context_feature_scaling_by_default(self):
             "m": 1.0,
             "l": 1.0,
             "initial_angle_max": np.pi,
-            "initial_velocity_max": 1
+            "initial_velocity_max": 1,
         }
         contexts = {
             "0": {"max_speed": 8.0, "dt": 0.03, "g": 10.0, "m": 1.0, "l": 1.8},
@@ -259,7 +259,9 @@ def test_context_feature_scaling_by_default(self):
         state, reward, done, info = env.step(action=action)
         n_c = len(default_context)
         scaled_contexts = state[-n_c:]
-        self.assertTrue(np.all(np.array([1.0, 0.6, 1, 1, 1.8, 1, 1]) == scaled_contexts))
+        self.assertTrue(
+            np.all(np.array([1.0, 0.6, 1, 1, 1.8, 1, 1]) == scaled_contexts)
+        )
 
     def test_context_feature_scaling_by_default_nodefcontext(self):
         with self.assertRaises(ValueError):
@@ -318,7 +320,9 @@ def test_context_mask(self):
         )
         s = env.reset()
         s_c = s["context"]
-        forbidden_in_context = [f for f in env.state_context_features if f in context_mask]
+        forbidden_in_context = [
+            f for f in env.state_context_features if f in context_mask
+        ]
         self.assertTrue(len(s_c) == len(list(env.default_context.keys())) - 2)
         self.assertTrue(len(forbidden_in_context) == 0)
 
diff --git a/test/test_selector.py b/test/test_selector.py
index 36789fff..ae52c84d 100644
--- a/test/test_selector.py
+++ b/test/test_selector.py
@@ -1,9 +1,15 @@
+from typing import Any, Dict
+
 import unittest
 from unittest.mock import patch
 
-from typing import Dict, Any
+from carl.context.selection import (
+    AbstractSelector,
+    CustomSelector,
+    RandomSelector,
+    RoundRobinSelector,
+)
 from carl.utils.types import Context
-from carl.context.selection import RoundRobinSelector, RandomSelector, AbstractSelector, CustomSelector
 
 
 def dummy_select(dummy):
@@ -19,7 +25,7 @@ def generate_contexts() -> Dict[Any, Context]:
         contexts = {k: v for k, v in zip(keys, values)}
         return contexts
 
-    @patch.object(AbstractSelector, '_select', dummy_select)
+    @patch.object(AbstractSelector, "_select", dummy_select)
     def test_abstract_selector(self):
         contexts = self.generate_contexts()
         selector = AbstractSelector(contexts=contexts)
@@ -67,7 +73,9 @@ def selector_function(inst: AbstractSelector):
             return inst.contexts[inst.contexts_keys[context_id]], context_id
 
         contexts = self.generate_contexts()
-        selector = CustomSelector(contexts=contexts, selector_function=selector_function)
+        selector = CustomSelector(
+            contexts=contexts, selector_function=selector_function
+        )
 
         selector.select()
         self.assertEqual(selector.context_id, 1)