From b8c72a53489c6d80196a1dc168835a2f375b868d Mon Sep 17 00:00:00 2001
From: Lucas Alegre <lucasnale@gmail.com>
Date: Sun, 13 Dec 2020 13:15:45 -0300
Subject: [PATCH] Add SUMO-RL as example project in the docs (#257)

* Add SUMO-RL as example project in the docs

* Fixed docstring of AtariWrapper which was not inside of __init__

* Updated changelog regarding docs

* Fix docstring of classes in atari_wrappers.py which were inside the constructor

* Formated docstring with black

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
---
 docs/misc/changelog.rst                    |  4 +-
 docs/misc/projects.rst                     | 11 +++
 stable_baselines3/common/atari_wrappers.py | 89 ++++++++++++----------
 3 files changed, 61 insertions(+), 43 deletions(-)

diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
index b90ce33a6..10ff71dc9 100644
--- a/docs/misc/changelog.rst
+++ b/docs/misc/changelog.rst
@@ -53,6 +53,8 @@ Documentation:
 - Fix ``clip_range`` docstring
 - Fix duplicated parameter in ``EvalCallback`` docstring (thanks @tfederico)
 - Added example of learning rate schedule
+- Added SUMO-RL as example project (@LucasAlegre)
+- Fix docstring of classes in atari_wrappers.py which were inside the constructor (@LucasAlegre)
 
 Pre-Release 0.10.0 (2020-10-28)
 -------------------------------
@@ -527,4 +529,4 @@ And all the contributors:
 @flodorner @KuKuXia @NeoExtended @PartiallyTyped @mmcenta @richardwu @kinalmehta @rolandgvc @tkelestemur @mloo3
 @tirafesi @blurLake @koulakis @joeljosephjin @shwang @rk37 @andyshih12 @RaphaelWag @xicocaio
 @diditforlulz273 @liorcohen5 @ManifoldFR @mloo3 @SwamyDev @wmmc88 @megan-klaiber @thisray
-@tfederico @hn2
+@tfederico @hn2 @LucasAlegre
diff --git a/docs/misc/projects.rst b/docs/misc/projects.rst
index 9a3b2f1ee..8424ce21a 100644
--- a/docs/misc/projects.rst
+++ b/docs/misc/projects.rst
@@ -37,3 +37,14 @@ It is an example of:
 
 | Author: Marios Koulakis
 | Github: https://github.com/koulakis/reacher-deep-reinforcement-learning
+
+SUMO-RL
+-------
+A simple interface to instantiate RL environments with SUMO for Traffic Signal Control.
+
+- Supports Multiagent RL
+- Compatibility with gym.Env and popular RL libraries such as stable-baselines3 and RLlib
+- Easy customisation: state and reward definitions are easily modifiable
+
+| Author: Lucas Alegre
+| Github: https://github.com/LucasAlegre/sumo-rl
\ No newline at end of file
diff --git a/stable_baselines3/common/atari_wrappers.py b/stable_baselines3/common/atari_wrappers.py
index b0c52959b..832ad9f23 100644
--- a/stable_baselines3/common/atari_wrappers.py
+++ b/stable_baselines3/common/atari_wrappers.py
@@ -13,14 +13,15 @@
 
 
 class NoopResetEnv(gym.Wrapper):
-    def __init__(self, env: gym.Env, noop_max: int = 30):
-        """
-        Sample initial states by taking random number of no-ops on reset.
-        No-op is assumed to be action 0.
+    """
+    Sample initial states by taking random number of no-ops on reset.
+    No-op is assumed to be action 0.
 
-        :param env: the environment to wrap
-        :param noop_max: the maximum value of no-ops to run
-        """
+    :param env: the environment to wrap
+    :param noop_max: the maximum value of no-ops to run
+    """
+
+    def __init__(self, env: gym.Env, noop_max: int = 30):
         gym.Wrapper.__init__(self, env)
         self.noop_max = noop_max
         self.override_num_noops = None
@@ -43,12 +44,13 @@ def reset(self, **kwargs) -> np.ndarray:
 
 
 class FireResetEnv(gym.Wrapper):
-    def __init__(self, env: gym.Env):
-        """
-        Take action on reset for environments that are fixed until firing.
+    """
+    Take action on reset for environments that are fixed until firing.
 
-        :param env: the environment to wrap
-        """
+    :param env: the environment to wrap
+    """
+
+    def __init__(self, env: gym.Env):
         gym.Wrapper.__init__(self, env)
         assert env.unwrapped.get_action_meanings()[1] == "FIRE"
         assert len(env.unwrapped.get_action_meanings()) >= 3
@@ -65,13 +67,14 @@ def reset(self, **kwargs) -> np.ndarray:
 
 
 class EpisodicLifeEnv(gym.Wrapper):
-    def __init__(self, env: gym.Env):
-        """
-        Make end-of-life == end-of-episode, but only reset on true game over.
-        Done by DeepMind for the DQN and co. since it helps value estimation.
+    """
+    Make end-of-life == end-of-episode, but only reset on true game over.
+    Done by DeepMind for the DQN and co. since it helps value estimation.
 
-        :param env: the environment to wrap
-        """
+    :param env: the environment to wrap
+    """
+
+    def __init__(self, env: gym.Env):
         gym.Wrapper.__init__(self, env)
         self.lives = 0
         self.was_real_done = True
@@ -109,13 +112,14 @@ def reset(self, **kwargs) -> np.ndarray:
 
 
 class MaxAndSkipEnv(gym.Wrapper):
-    def __init__(self, env: gym.Env, skip: int = 4):
-        """
-        Return only every ``skip``-th frame (frameskipping)
+    """
+    Return only every ``skip``-th frame (frameskipping)
 
-        :param env: the environment
-        :param skip: number of ``skip``-th frame
-        """
+    :param env: the environment
+    :param skip: number of ``skip``-th frame
+    """
+
+    def __init__(self, env: gym.Env, skip: int = 4):
         gym.Wrapper.__init__(self, env)
         # most recent raw observations (for max pooling across time steps)
         self._obs_buffer = np.zeros((2,) + env.observation_space.shape, dtype=env.observation_space.dtype)
@@ -151,12 +155,13 @@ def reset(self, **kwargs) -> GymObs:
 
 
 class ClipRewardEnv(gym.RewardWrapper):
-    def __init__(self, env: gym.Env):
-        """
-        Clips the reward to {+1, 0, -1} by its sign.
+    """
+    Clips the reward to {+1, 0, -1} by its sign.
 
-        :param env: the environment
-        """
+    :param env: the environment
+    """
+
+    def __init__(self, env: gym.Env):
         gym.RewardWrapper.__init__(self, env)
 
     def reward(self, reward: float) -> float:
@@ -170,15 +175,16 @@ def reward(self, reward: float) -> float:
 
 
 class WarpFrame(gym.ObservationWrapper):
-    def __init__(self, env: gym.Env, width: int = 84, height: int = 84):
-        """
-        Convert to grayscale and warp frames to 84x84 (default)
-        as done in the Nature paper and later work.
+    """
+    Convert to grayscale and warp frames to 84x84 (default)
+    as done in the Nature paper and later work.
 
-        :param env: the environment
-        :param width:
-        :param height:
-        """
+    :param env: the environment
+    :param width:
+    :param height:
+    """
+
+    def __init__(self, env: gym.Env, width: int = 84, height: int = 84):
         gym.ObservationWrapper.__init__(self, env)
         self.width = width
         self.height = height
@@ -213,11 +219,10 @@ class AtariWrapper(gym.Wrapper):
     * Clip reward to {-1, 0, 1}
 
     :param env: gym environment
-    :param noop_max:: max number of no-ops
-    :param frame_skip:: the frequency at which the agent experiences the game.
-    :param screen_size:: resize Atari frame
-    :param terminal_on_life_loss:: if True, then step() returns done=True whenever a
-            life is lost.
+    :param noop_max: max number of no-ops
+    :param frame_skip: the frequency at which the agent experiences the game.
+    :param screen_size: resize Atari frame
+    :param terminal_on_life_loss: if True, then step() returns done=True whenever a life is lost.
     :param clip_reward: If True (default), the reward is clip to {-1, 0, 1} depending on its sign.
     """