Skip to content

Commit

Permalink
Add SUMO-RL as example project in the docs (#257)
Browse files Browse the repository at this point in the history
* Add SUMO-RL as example project in the docs

* Fixed docstring of AtariWrapper which was not inside of __init__

* Updated changelog regarding docs

* Fix docstring of classes in atari_wrappers.py which were inside the constructor

* Formated docstring with black

Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
  • Loading branch information
LucasAlegre and araffin authored Dec 13, 2020
1 parent e63e9d7 commit b8c72a5
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 43 deletions.
4 changes: 3 additions & 1 deletion docs/misc/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ Documentation:
- Fix ``clip_range`` docstring
- Fix duplicated parameter in ``EvalCallback`` docstring (thanks @tfederico)
- Added example of learning rate schedule
- Added SUMO-RL as example project (@LucasAlegre)
- Fix docstring of classes in atari_wrappers.py which were inside the constructor (@LucasAlegre)

Pre-Release 0.10.0 (2020-10-28)
-------------------------------
Expand Down Expand Up @@ -527,4 +529,4 @@ And all the contributors:
@flodorner @KuKuXia @NeoExtended @PartiallyTyped @mmcenta @richardwu @kinalmehta @rolandgvc @tkelestemur @mloo3
@tirafesi @blurLake @koulakis @joeljosephjin @shwang @rk37 @andyshih12 @RaphaelWag @xicocaio
@diditforlulz273 @liorcohen5 @ManifoldFR @mloo3 @SwamyDev @wmmc88 @megan-klaiber @thisray
@tfederico @hn2
@tfederico @hn2 @LucasAlegre
11 changes: 11 additions & 0 deletions docs/misc/projects.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,14 @@ It is an example of:

| Author: Marios Koulakis
| Github: https://github.com/koulakis/reacher-deep-reinforcement-learning
SUMO-RL
-------
A simple interface to instantiate RL environments with SUMO for Traffic Signal Control.

- Supports Multiagent RL
- Compatibility with gym.Env and popular RL libraries such as stable-baselines3 and RLlib
- Easy customisation: state and reward definitions are easily modifiable

| Author: Lucas Alegre
| Github: https://github.com/LucasAlegre/sumo-rl
89 changes: 47 additions & 42 deletions stable_baselines3/common/atari_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,15 @@


class NoopResetEnv(gym.Wrapper):
def __init__(self, env: gym.Env, noop_max: int = 30):
"""
Sample initial states by taking random number of no-ops on reset.
No-op is assumed to be action 0.
"""
Sample initial states by taking random number of no-ops on reset.
No-op is assumed to be action 0.
:param env: the environment to wrap
:param noop_max: the maximum value of no-ops to run
"""
:param env: the environment to wrap
:param noop_max: the maximum value of no-ops to run
"""

def __init__(self, env: gym.Env, noop_max: int = 30):
gym.Wrapper.__init__(self, env)
self.noop_max = noop_max
self.override_num_noops = None
Expand All @@ -43,12 +44,13 @@ def reset(self, **kwargs) -> np.ndarray:


class FireResetEnv(gym.Wrapper):
def __init__(self, env: gym.Env):
"""
Take action on reset for environments that are fixed until firing.
"""
Take action on reset for environments that are fixed until firing.
:param env: the environment to wrap
"""
:param env: the environment to wrap
"""

def __init__(self, env: gym.Env):
gym.Wrapper.__init__(self, env)
assert env.unwrapped.get_action_meanings()[1] == "FIRE"
assert len(env.unwrapped.get_action_meanings()) >= 3
Expand All @@ -65,13 +67,14 @@ def reset(self, **kwargs) -> np.ndarray:


class EpisodicLifeEnv(gym.Wrapper):
def __init__(self, env: gym.Env):
"""
Make end-of-life == end-of-episode, but only reset on true game over.
Done by DeepMind for the DQN and co. since it helps value estimation.
"""
Make end-of-life == end-of-episode, but only reset on true game over.
Done by DeepMind for the DQN and co. since it helps value estimation.
:param env: the environment to wrap
"""
:param env: the environment to wrap
"""

def __init__(self, env: gym.Env):
gym.Wrapper.__init__(self, env)
self.lives = 0
self.was_real_done = True
Expand Down Expand Up @@ -109,13 +112,14 @@ def reset(self, **kwargs) -> np.ndarray:


class MaxAndSkipEnv(gym.Wrapper):
def __init__(self, env: gym.Env, skip: int = 4):
"""
Return only every ``skip``-th frame (frameskipping)
"""
Return only every ``skip``-th frame (frameskipping)
:param env: the environment
:param skip: number of ``skip``-th frame
"""
:param env: the environment
:param skip: number of ``skip``-th frame
"""

def __init__(self, env: gym.Env, skip: int = 4):
gym.Wrapper.__init__(self, env)
# most recent raw observations (for max pooling across time steps)
self._obs_buffer = np.zeros((2,) + env.observation_space.shape, dtype=env.observation_space.dtype)
Expand Down Expand Up @@ -151,12 +155,13 @@ def reset(self, **kwargs) -> GymObs:


class ClipRewardEnv(gym.RewardWrapper):
def __init__(self, env: gym.Env):
"""
Clips the reward to {+1, 0, -1} by its sign.
"""
Clips the reward to {+1, 0, -1} by its sign.
:param env: the environment
"""
:param env: the environment
"""

def __init__(self, env: gym.Env):
gym.RewardWrapper.__init__(self, env)

def reward(self, reward: float) -> float:
Expand All @@ -170,15 +175,16 @@ def reward(self, reward: float) -> float:


class WarpFrame(gym.ObservationWrapper):
def __init__(self, env: gym.Env, width: int = 84, height: int = 84):
"""
Convert to grayscale and warp frames to 84x84 (default)
as done in the Nature paper and later work.
"""
Convert to grayscale and warp frames to 84x84 (default)
as done in the Nature paper and later work.
:param env: the environment
:param width:
:param height:
"""
:param env: the environment
:param width:
:param height:
"""

def __init__(self, env: gym.Env, width: int = 84, height: int = 84):
gym.ObservationWrapper.__init__(self, env)
self.width = width
self.height = height
Expand Down Expand Up @@ -213,11 +219,10 @@ class AtariWrapper(gym.Wrapper):
* Clip reward to {-1, 0, 1}
:param env: gym environment
:param noop_max:: max number of no-ops
:param frame_skip:: the frequency at which the agent experiences the game.
:param screen_size:: resize Atari frame
:param terminal_on_life_loss:: if True, then step() returns done=True whenever a
life is lost.
:param noop_max: max number of no-ops
:param frame_skip: the frequency at which the agent experiences the game.
:param screen_size: resize Atari frame
:param terminal_on_life_loss: if True, then step() returns done=True whenever a life is lost.
:param clip_reward: If True (default), the reward is clip to {-1, 0, 1} depending on its sign.
"""

Expand Down

0 comments on commit b8c72a5

Please sign in to comment.