From b8c72a53489c6d80196a1dc168835a2f375b868d Mon Sep 17 00:00:00 2001 From: Lucas Alegre Date: Sun, 13 Dec 2020 13:15:45 -0300 Subject: [PATCH] Add SUMO-RL as example project in the docs (#257) * Add SUMO-RL as example project in the docs * Fixed docstring of AtariWrapper which was not inside of __init__ * Updated changelog regarding docs * Fix docstring of classes in atari_wrappers.py which were inside the constructor * Formated docstring with black Co-authored-by: Antonin RAFFIN --- docs/misc/changelog.rst | 4 +- docs/misc/projects.rst | 11 +++ stable_baselines3/common/atari_wrappers.py | 89 ++++++++++++---------- 3 files changed, 61 insertions(+), 43 deletions(-) diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index b90ce33a6..10ff71dc9 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -53,6 +53,8 @@ Documentation: - Fix ``clip_range`` docstring - Fix duplicated parameter in ``EvalCallback`` docstring (thanks @tfederico) - Added example of learning rate schedule +- Added SUMO-RL as example project (@LucasAlegre) +- Fix docstring of classes in atari_wrappers.py which were inside the constructor (@LucasAlegre) Pre-Release 0.10.0 (2020-10-28) ------------------------------- @@ -527,4 +529,4 @@ And all the contributors: @flodorner @KuKuXia @NeoExtended @PartiallyTyped @mmcenta @richardwu @kinalmehta @rolandgvc @tkelestemur @mloo3 @tirafesi @blurLake @koulakis @joeljosephjin @shwang @rk37 @andyshih12 @RaphaelWag @xicocaio @diditforlulz273 @liorcohen5 @ManifoldFR @mloo3 @SwamyDev @wmmc88 @megan-klaiber @thisray -@tfederico @hn2 +@tfederico @hn2 @LucasAlegre diff --git a/docs/misc/projects.rst b/docs/misc/projects.rst index 9a3b2f1ee..8424ce21a 100644 --- a/docs/misc/projects.rst +++ b/docs/misc/projects.rst @@ -37,3 +37,14 @@ It is an example of: | Author: Marios Koulakis | Github: https://github.com/koulakis/reacher-deep-reinforcement-learning + +SUMO-RL +------- +A simple interface to instantiate RL environments with SUMO for Traffic Signal Control. + +- Supports Multiagent RL +- Compatibility with gym.Env and popular RL libraries such as stable-baselines3 and RLlib +- Easy customisation: state and reward definitions are easily modifiable + +| Author: Lucas Alegre +| Github: https://github.com/LucasAlegre/sumo-rl \ No newline at end of file diff --git a/stable_baselines3/common/atari_wrappers.py b/stable_baselines3/common/atari_wrappers.py index b0c52959b..832ad9f23 100644 --- a/stable_baselines3/common/atari_wrappers.py +++ b/stable_baselines3/common/atari_wrappers.py @@ -13,14 +13,15 @@ class NoopResetEnv(gym.Wrapper): - def __init__(self, env: gym.Env, noop_max: int = 30): - """ - Sample initial states by taking random number of no-ops on reset. - No-op is assumed to be action 0. + """ + Sample initial states by taking random number of no-ops on reset. + No-op is assumed to be action 0. - :param env: the environment to wrap - :param noop_max: the maximum value of no-ops to run - """ + :param env: the environment to wrap + :param noop_max: the maximum value of no-ops to run + """ + + def __init__(self, env: gym.Env, noop_max: int = 30): gym.Wrapper.__init__(self, env) self.noop_max = noop_max self.override_num_noops = None @@ -43,12 +44,13 @@ def reset(self, **kwargs) -> np.ndarray: class FireResetEnv(gym.Wrapper): - def __init__(self, env: gym.Env): - """ - Take action on reset for environments that are fixed until firing. + """ + Take action on reset for environments that are fixed until firing. - :param env: the environment to wrap - """ + :param env: the environment to wrap + """ + + def __init__(self, env: gym.Env): gym.Wrapper.__init__(self, env) assert env.unwrapped.get_action_meanings()[1] == "FIRE" assert len(env.unwrapped.get_action_meanings()) >= 3 @@ -65,13 +67,14 @@ def reset(self, **kwargs) -> np.ndarray: class EpisodicLifeEnv(gym.Wrapper): - def __init__(self, env: gym.Env): - """ - Make end-of-life == end-of-episode, but only reset on true game over. - Done by DeepMind for the DQN and co. since it helps value estimation. + """ + Make end-of-life == end-of-episode, but only reset on true game over. + Done by DeepMind for the DQN and co. since it helps value estimation. - :param env: the environment to wrap - """ + :param env: the environment to wrap + """ + + def __init__(self, env: gym.Env): gym.Wrapper.__init__(self, env) self.lives = 0 self.was_real_done = True @@ -109,13 +112,14 @@ def reset(self, **kwargs) -> np.ndarray: class MaxAndSkipEnv(gym.Wrapper): - def __init__(self, env: gym.Env, skip: int = 4): - """ - Return only every ``skip``-th frame (frameskipping) + """ + Return only every ``skip``-th frame (frameskipping) - :param env: the environment - :param skip: number of ``skip``-th frame - """ + :param env: the environment + :param skip: number of ``skip``-th frame + """ + + def __init__(self, env: gym.Env, skip: int = 4): gym.Wrapper.__init__(self, env) # most recent raw observations (for max pooling across time steps) self._obs_buffer = np.zeros((2,) + env.observation_space.shape, dtype=env.observation_space.dtype) @@ -151,12 +155,13 @@ def reset(self, **kwargs) -> GymObs: class ClipRewardEnv(gym.RewardWrapper): - def __init__(self, env: gym.Env): - """ - Clips the reward to {+1, 0, -1} by its sign. + """ + Clips the reward to {+1, 0, -1} by its sign. - :param env: the environment - """ + :param env: the environment + """ + + def __init__(self, env: gym.Env): gym.RewardWrapper.__init__(self, env) def reward(self, reward: float) -> float: @@ -170,15 +175,16 @@ def reward(self, reward: float) -> float: class WarpFrame(gym.ObservationWrapper): - def __init__(self, env: gym.Env, width: int = 84, height: int = 84): - """ - Convert to grayscale and warp frames to 84x84 (default) - as done in the Nature paper and later work. + """ + Convert to grayscale and warp frames to 84x84 (default) + as done in the Nature paper and later work. - :param env: the environment - :param width: - :param height: - """ + :param env: the environment + :param width: + :param height: + """ + + def __init__(self, env: gym.Env, width: int = 84, height: int = 84): gym.ObservationWrapper.__init__(self, env) self.width = width self.height = height @@ -213,11 +219,10 @@ class AtariWrapper(gym.Wrapper): * Clip reward to {-1, 0, 1} :param env: gym environment - :param noop_max:: max number of no-ops - :param frame_skip:: the frequency at which the agent experiences the game. - :param screen_size:: resize Atari frame - :param terminal_on_life_loss:: if True, then step() returns done=True whenever a - life is lost. + :param noop_max: max number of no-ops + :param frame_skip: the frequency at which the agent experiences the game. + :param screen_size: resize Atari frame + :param terminal_on_life_loss: if True, then step() returns done=True whenever a life is lost. :param clip_reward: If True (default), the reward is clip to {-1, 0, 1} depending on its sign. """