DLR-RM · araffin · Dec 13, 2020 · Dec 9, 2020 · Dec 10, 2020 · Dec 13, 2020
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -53,6 +53,8 @@ Documentation:
 - Fix ``clip_range`` docstring
 - Fix duplicated parameter in ``EvalCallback`` docstring (thanks @tfederico)
 - Added example of learning rate schedule
+- Added SUMO-RL as example project (@LucasAlegre)
+- Fix docstring of classes in atari_wrappers.py which were inside the constructor (@LucasAlegre)
 
 Pre-Release 0.10.0 (2020-10-28)
 -------------------------------
@@ -527,4 +529,4 @@ And all the contributors:
 @flodorner @KuKuXia @NeoExtended @PartiallyTyped @mmcenta @richardwu @kinalmehta @rolandgvc @tkelestemur @mloo3
 @tirafesi @blurLake @koulakis @joeljosephjin @shwang @rk37 @andyshih12 @RaphaelWag @xicocaio
 @diditforlulz273 @liorcohen5 @ManifoldFR @mloo3 @SwamyDev @wmmc88 @megan-klaiber @thisray
-@tfederico @hn2
+@tfederico @hn2 @LucasAlegre
diff --git a/docs/misc/projects.rst b/docs/misc/projects.rst
@@ -37,3 +37,14 @@ It is an example of:
 
 | Author: Marios Koulakis
 | Github: https://github.com/koulakis/reacher-deep-reinforcement-learning
+
+SUMO-RL
+-------
+A simple interface to instantiate RL environments with SUMO for Traffic Signal Control.
+
+- Supports Multiagent RL
+- Compatibility with gym.Env and popular RL libraries such as stable-baselines3 and RLlib
+- Easy customisation: state and reward definitions are easily modifiable
+
+| Author: Lucas Alegre
+| Github: https://github.com/LucasAlegre/sumo-rl
diff --git a/stable_baselines3/common/atari_wrappers.py b/stable_baselines3/common/atari_wrappers.py
@@ -13,14 +13,15 @@
 
 
 class NoopResetEnv(gym.Wrapper):
- def __init__(self, env: gym.Env, noop_max: int = 30):
- """
- Sample initial states by taking random number of no-ops on reset.
- No-op is assumed to be action 0.
+ """
+ Sample initial states by taking random number of no-ops on reset.
+ No-op is assumed to be action 0.
 
- :param env: the environment to wrap
- :param noop_max: the maximum value of no-ops to run
- """
+ :param env: the environment to wrap
+ :param noop_max: the maximum value of no-ops to run
+ """
+
+ def __init__(self, env: gym.Env, noop_max: int = 30):
  gym.Wrapper.__init__(self, env)
  self.noop_max = noop_max
  self.override_num_noops = None
@@ -43,12 +44,13 @@ def reset(self, **kwargs) -> np.ndarray:
 
 
 class FireResetEnv(gym.Wrapper):
- def __init__(self, env: gym.Env):
- """
- Take action on reset for environments that are fixed until firing.
+ """
+ Take action on reset for environments that are fixed until firing.
 
- :param env: the environment to wrap
- """
+ :param env: the environment to wrap
+ """
+
+ def __init__(self, env: gym.Env):
  gym.Wrapper.__init__(self, env)
  assert env.unwrapped.get_action_meanings()[1] == "FIRE"
  assert len(env.unwrapped.get_action_meanings()) >= 3
@@ -65,13 +67,14 @@ def reset(self, **kwargs) -> np.ndarray:
 
 
 class EpisodicLifeEnv(gym.Wrapper):
- def __init__(self, env: gym.Env):
- """
- Make end-of-life == end-of-episode, but only reset on true game over.
- Done by DeepMind for the DQN and co. since it helps value estimation.
+ """
+ Make end-of-life == end-of-episode, but only reset on true game over.
+ Done by DeepMind for the DQN and co. since it helps value estimation.
 
- :param env: the environment to wrap
- """
+ :param env: the environment to wrap
+ """
+
+ def __init__(self, env: gym.Env):
  gym.Wrapper.__init__(self, env)
  self.lives = 0
  self.was_real_done = True
@@ -109,13 +112,14 @@ def reset(self, **kwargs) -> np.ndarray:
 
 
 class MaxAndSkipEnv(gym.Wrapper):
- def __init__(self, env: gym.Env, skip: int = 4):
- """
- Return only every ``skip``-th frame (frameskipping)
+ """
+ Return only every ``skip``-th frame (frameskipping)
 
- :param env: the environment
- :param skip: number of ``skip``-th frame
- """
+ :param env: the environment
+ :param skip: number of ``skip``-th frame
+ """
+
+ def __init__(self, env: gym.Env, skip: int = 4):
  gym.Wrapper.__init__(self, env)
  # most recent raw observations (for max pooling across time steps)
  self._obs_buffer = np.zeros((2,) + env.observation_space.shape, dtype=env.observation_space.dtype)
@@ -151,12 +155,13 @@ def reset(self, **kwargs) -> GymObs:
 
 
 class ClipRewardEnv(gym.RewardWrapper):
- def __init__(self, env: gym.Env):
- """
- Clips the reward to {+1, 0, -1} by its sign.
+ """
+ Clips the reward to {+1, 0, -1} by its sign.
 
- :param env: the environment
- """
+ :param env: the environment
+ """
+
+ def __init__(self, env: gym.Env):
  gym.RewardWrapper.__init__(self, env)
 
  def reward(self, reward: float) -> float:
@@ -170,15 +175,16 @@ def reward(self, reward: float) -> float:
 
 
 class WarpFrame(gym.ObservationWrapper):
- def __init__(self, env: gym.Env, width: int = 84, height: int = 84):
- """
- Convert to grayscale and warp frames to 84x84 (default)
- as done in the Nature paper and later work.
+ """
+ Convert to grayscale and warp frames to 84x84 (default)
+ as done in the Nature paper and later work.
 
- :param env: the environment
- :param width:
- :param height:
- """
+ :param env: the environment
+ :param width:
+ :param height:
+ """
+
+ def __init__(self, env: gym.Env, width: int = 84, height: int = 84):
  gym.ObservationWrapper.__init__(self, env)
  self.width = width
  self.height = height
@@ -213,11 +219,10 @@ class AtariWrapper(gym.Wrapper):
  * Clip reward to {-1, 0, 1}
 
  :param env: gym environment
- :param noop_max:: max number of no-ops
- :param frame_skip:: the frequency at which the agent experiences the game.
- :param screen_size:: resize Atari frame
- :param terminal_on_life_loss:: if True, then step() returns done=True whenever a
- life is lost.
+ :param noop_max: max number of no-ops
+ :param frame_skip: the frequency at which the agent experiences the game.
+ :param screen_size: resize Atari frame
+ :param terminal_on_life_loss: if True, then step() returns done=True whenever a life is lost.
  :param clip_reward: If True (default), the reward is clip to {-1, 0, 1} depending on its sign.
  """