diff --git a/flow/envs/__init__.py b/flow/envs/__init__.py
index 39d39a9b7..3d287f105 100755
--- a/flow/envs/__init__.py
+++ b/flow/envs/__init__.py
@@ -1,5 +1,7 @@
+"""Contains all callable environments in Flow."""
+
 from flow.envs.base_env import Env
-from flow.envs.bay_bridge import BayBridgeEnv
+from flow.envs.bay_bridge.base import BayBridgeEnv
 from flow.envs.bottleneck_env import BottleNeckAccelEnv, BottleneckEnv, \
     DesiredVelocityEnv
 from flow.envs.green_wave_env import TrafficLightGridEnv, \
diff --git a/flow/envs/base_env.py b/flow/envs/base_env.py
index 445f2b1e7..605aed2b1 100755
--- a/flow/envs/base_env.py
+++ b/flow/envs/base_env.py
@@ -1,3 +1,5 @@
+"""Base environment class. This is the parent of all other environments."""
+
 import logging
 import os
 import signal
@@ -35,35 +37,36 @@
 
 
 class Env(gym.Env, Serializable):
+    """Base environment class.
+
+    Provides the interface for controlling a SUMO simulation. Using this
+    class, you can start sumo, provide a scenario to specify a
+    configuration and controllers, perform simulation steps, and reset the
+    simulation to an initial configuration.
+
+    Env is Serializable to allow for pickling and replaying of the policy.
+
+    This class cannot be used as is: you must extend it to implement an
+    action applicator method, and properties to define the MDP if you
+    choose to use it with an rl library (e.g. RLlib). This can be done by
+    overloading the following functions in a child class:
+     - action_space
+     - observation_space
+     - apply_rl_action
+     - get_state
+     - compute_reward
+
+    Attributes
+    ----------
+    env_params: EnvParams type:
+       see flow/core/params.py
+    sumo_params: SumoParams type
+       see flow/core/params.py
+    scenario: Scenario type
+        see flow/scenarios/base_scenario.py
+    """
+
     def __init__(self, env_params, sumo_params, scenario):
-        """Base environment class.
-
-        Provides the interface for controlling a SUMO simulation. Using this
-        class, you can start sumo, provide a scenario to specify a
-        configuration and controllers, perform simulation steps, and reset the
-        simulation to an initial configuration.
-
-        Env is Serializable to allow for pickling and replaying of the policy.
-
-        This class cannot be used as is: you must extend it to implement an
-        action applicator method, and properties to define the MDP if you
-        choose to use it with an rl library (e.g. RLlib). This can be done by
-        overloading the following functions in a child class:
-         - action_space
-         - observation_space
-         - apply_rl_action
-         - get_state
-         - compute_reward
-
-        Attributes
-        ----------
-        env_params: EnvParams type:
-           see flow/core/params.py
-        sumo_params: SumoParams type
-           see flow/core/params.py
-        scenario: Scenario type
-            see flow/scenarios/base_scenario.py
-        """
         # Invoke serializable if using rllab
         if Serializable is not object:
             Serializable.quick_init(self, locals())
@@ -128,7 +131,7 @@ def __init__(self, env_params, sumo_params, scenario):
         self.setup_initial_state()
 
     def restart_sumo(self, sumo_params, sumo_binary=None):
-        """Restarts an already initialized sumo instance.
+        """Restart an already initialized sumo instance.
 
         This is used when visualizing a rollout, in order to update the
         sumo_binary with potentially a gui and export emission data from sumo.
@@ -157,7 +160,7 @@ def restart_sumo(self, sumo_params, sumo_binary=None):
         self.setup_initial_state()
 
     def start_sumo(self):
-        """Starts a sumo instance.
+        """Start a sumo instance.
 
         Uses the configuration files created by the generator class to
         initialize a sumo instance. Also initializes a traci connection to
@@ -254,11 +257,11 @@ def start_sumo(self):
         raise error
 
     def setup_initial_state(self):
-        """Returns information on the initial state of the vehicles in the
-        network, to be used upon reset.
+        """Return information on the initial state of vehicles in the network.
 
-        Also adds initial state information to the self.vehicles class and
-        starts a subscription with sumo to collect state information each step.
+        This information is to be used upon reset. This method also adds this
+        information to the self.vehicles class and starts a subscription with
+        sumo to collect state information each step.
 
         Returns
         -------
@@ -348,7 +351,7 @@ def setup_initial_state(self):
         self.vehicles.update(vehicle_obs, id_lists, self)
 
     def step(self, rl_actions):
-        """Advances the environment by one step.
+        """Advance the environment by one step.
 
         Assigns actions to autonomous and human-driven agents (i.e. vehicles,
         traffic lights, etc...). Actions that are not assigned are left to the
@@ -460,7 +463,7 @@ def step(self, rl_actions):
         return next_observation, reward, crash, {}
 
     def reset(self):
-        """Resets the environment.
+        """Reset the environment.
 
         This method is performed in between rollouts. It resets the state of
         the environment, and re-initializes the vehicles in their starting
@@ -619,7 +622,7 @@ def additional_command(self):
         pass
 
     def apply_rl_actions(self, rl_actions=None):
-        """Specifies the actions to be performed by the rl agent(s).
+        """Specify the actions to be performed by the rl agent(s).
 
         If no actions are provided at any given step, the rl agents default to
         performing actions specified by sumo.
@@ -646,7 +649,7 @@ def _apply_rl_actions(self, rl_actions):
         raise NotImplementedError
 
     def apply_acceleration(self, veh_ids, acc):
-        """Applies the acceleration requested by a vehicle in sumo.
+        """Apply the acceleration requested by a vehicle in sumo.
 
         Note that, if the sumo-specified speed mode of the vehicle is not
         "aggressive", the acceleration may be clipped by some safety velocity
@@ -666,8 +669,12 @@ def apply_acceleration(self, veh_ids, acc):
                 self.traci_connection.vehicle.slowDown(vid, next_vel, 1)
 
     def apply_lane_change(self, veh_ids, direction):
-        """Applies an instantaneous lane-change to a set of vehicles, while
-        preventing vehicles from moving to lanes that do not exist.
+        """Apply an instantaneous lane-change to a set of vehicles.
+
+        This method also prevents vehicles from moving to lanes that do not
+        exist, and set the "last_lc" variable for RL vehicles that lane changed
+        to match the current time step, in order to assist in maintaining a
+        lane change duration for these vehicles.
 
         Parameters
         ----------
@@ -711,7 +718,7 @@ def apply_lane_change(self, veh_ids, direction):
                         self.vehicles.get_state(veh_id, "last_lc")
 
     def choose_routes(self, veh_ids, route_choices):
-        """Updates the route choice of vehicles in the network.
+        """Update the route choice of vehicles in the network.
 
         Parameters
         ----------
@@ -728,8 +735,11 @@ def choose_routes(self, veh_ids, route_choices):
                     vehID=veh_id, edgeList=route_choices[i])
 
     def get_x_by_id(self, veh_id):
-        """Provides a 1-dimensional representation of the position of a vehicle
-        in the network.
+        """Provide a 1-D representation of the position of a vehicle.
+
+        Note: These values are only meaningful if the specify_edge_starts
+        method in the scenario is set appropriately; otherwise, a value of 0 is
+        returned for all vehicles.
 
         Parameters
         ----------
@@ -748,7 +758,7 @@ def get_x_by_id(self, veh_id):
             self.vehicles.get_edge(veh_id), self.vehicles.get_position(veh_id))
 
     def sort_by_position(self):
-        """Sorts the vehicle ids of vehicles in the network by position.
+        """Sort the vehicle ids of vehicles in the network by position.
 
         The base environment does this by sorting vehicles by their absolute
         position.
@@ -771,7 +781,7 @@ def sort_by_position(self):
             return self.vehicles.get_ids(), None
 
     def update_vehicle_colors(self):
-        """Modifies the color of vehicles if rendering is active.
+        """Modify the color of vehicles if rendering is active.
 
         The colors of all vehicles are updated as follows:
         - red: autonomous (rl) vehicles
@@ -809,7 +819,7 @@ def update_vehicle_colors(self):
             self.vehicles.remove_observed(veh_id)
 
     def get_state(self):
-        """Returns the state of the simulation as perceived by the RL agent.
+        """Return the state of the simulation as perceived by the RL agent.
 
         MUST BE implemented in new environments.
 
@@ -823,8 +833,7 @@ def get_state(self):
 
     @property
     def action_space(self):
-        """Identifies the dimensions and bounds of the action space (needed for
-        gym environments).
+        """Identify the dimensions and bounds of the action space.
 
         MUST BE implemented in new environments.
 
@@ -837,8 +846,7 @@ def action_space(self):
 
     @property
     def observation_space(self):
-        """Identifies the dimensions and bounds of the observation space
-        (needed for gym environments).
+        """Identify the dimensions and bounds of the observation space.
 
         MUST BE implemented in new environments.
 
@@ -873,7 +881,7 @@ def compute_reward(self, state, rl_actions, **kwargs):
         return 0
 
     def terminate(self):
-        """Closes the TraCI I/O connection.
+        """Close the TraCI I/O connection.
 
         Should be done at end of every experiment. Must be in Env because the
         environment opens the TraCI connection.
@@ -885,6 +893,7 @@ def _close(self):
         self.scenario.close()
 
     def teardown_sumo(self):
+        """Kill the sumo subprocess instance."""
         try:
             os.killpg(self.sumo_proc.pid, signal.SIGTERM)
         except Exception:
@@ -894,4 +903,5 @@ def _seed(self, seed=None):
         return []
 
     def render(self, mode='human'):
+        """See parent class (gym.Env)."""
         pass
diff --git a/flow/envs/bay_bridge/__init__.py b/flow/envs/bay_bridge/__init__.py
deleted file mode 100644
index d5802d7e0..000000000
--- a/flow/envs/bay_bridge/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from flow.envs.bay_bridge.base import BayBridgeEnv
-
-__all__ = ["BayBridgeEnv"]
diff --git a/flow/envs/bay_bridge/base.py b/flow/envs/bay_bridge/base.py
index 8cb188d8f..411f0bfe4 100644
--- a/flow/envs/bay_bridge/base.py
+++ b/flow/envs/bay_bridge/base.py
@@ -209,12 +209,15 @@ def apply_toll_bridge_control(self):
 
     # TODO: decide on a good reward function
     def compute_reward(self, state, rl_actions, **kwargs):
+        """See class definition."""
         return np.mean(self.vehicles.get_speed(self.vehicles.get_ids()))
 
     """ The below methods need to be updated by child classes. """
 
     def _apply_rl_actions(self, rl_actions):
+        """Implemented by child classes."""
         pass
 
     def get_state(self):
+        """Implemented by child classes."""
         return []
diff --git a/flow/envs/bottleneck_env.py b/flow/envs/bottleneck_env.py
index bf8befdef..10fe5fd08 100644
--- a/flow/envs/bottleneck_env.py
+++ b/flow/envs/bottleneck_env.py
@@ -1,3 +1,10 @@
+"""
+Environments for training vehicles to reduce capacity drops in a bottleneck.
+
+This environment was used in:
+TODO(ak): add paper after it has been published.
+"""
+
 from flow.controllers.rlcontroller import RLController
 from flow.controllers.lane_change_controllers import SumoLaneChangeController
 from flow.controllers.routing_controllers import ContinuousRouter
@@ -347,6 +354,7 @@ def get_avg_bottleneck_velocity(self):
     # Dummy action and observation spaces
     @property
     def action_space(self):
+        """See class definition."""
         return Box(
             low=-float("inf"),
             high=float("inf"),
@@ -355,6 +363,7 @@ def action_space(self):
 
     @property
     def observation_space(self):
+        """See class definition."""
         return Box(
             low=-float("inf"),
             high=float("inf"),
@@ -369,6 +378,7 @@ def compute_reward(self, state, rl_actions, **kwargs):
         return reward
 
     def get_state(self):
+        """See class definition."""
         return np.asarray([1])
 
 
@@ -411,6 +421,7 @@ def __init__(self, env_params, sumo_params, scenario):
 
     @property
     def observation_space(self):
+        """See class definition."""
         num_edges = len(self.scenario.get_edge_list())
         num_rl_veh = self.num_rl
         num_obs = 2 * num_edges + 4 * MAX_LANES * self.scaling \
@@ -419,6 +430,7 @@ def observation_space(self):
         return Box(low=0, high=1, shape=(num_obs, ), dtype=np.float32)
 
     def get_state(self):
+        """See class definition."""
         headway_scale = 1000
 
         rl_ids = self.vehicles.get_rl_ids()
@@ -521,6 +533,7 @@ def get_state(self):
         return np.concatenate((rl_obs, relative_obs, edge_obs))
 
     def compute_reward(self, state, rl_actions, **kwargs):
+        """See class definition."""
         num_rl = self.vehicles.num_rl_vehicles
         lane_change_acts = np.abs(np.round(rl_actions[1::2])[:num_rl])
         return (rewards.desired_velocity(self) + rewards.rl_forward_progress(
@@ -536,7 +549,7 @@ def sort_by_position(self):
 
     def _apply_rl_actions(self, actions):
         """
-        See parent class
+        See parent class.
 
         Takes a tuple and applies a lane change or acceleration. if a lane
         change is applied, don't issue any commands
@@ -711,6 +724,7 @@ def __init__(self, env_params, sumo_params, scenario):
 
     @property
     def observation_space(self):
+        """See class definition."""
         num_obs = 0
         # density and velocity for rl and non-rl vehicles per segment
         # Last element is the outflow
@@ -721,6 +735,7 @@ def observation_space(self):
 
     @property
     def action_space(self):
+        """See class definition."""
         if self.symmetric:
             action_size = self.total_controlled_segments
         else:
@@ -733,6 +748,7 @@ def action_space(self):
             low=-1.5, high=1.0, shape=(int(action_size), ), dtype=np.float32)
 
     def get_state(self):
+        """See class definition."""
         # action space is number of vehicles in each segment in each lane,
         # number of rl vehicles in each segment in each lane
         # mean speed in each segment, and mean rl speed in each
@@ -829,7 +845,7 @@ def _apply_rl_actions(self, rl_actions):
                     self.traci_connection.vehicle.setMaxSpeed(rl_id, 23.0)
 
     def compute_reward(self, state, rl_actions, **kwargs):
-        """ Outflow rate over last ten seconds normalized to max of 1 """
+        """Outflow rate over last ten seconds normalized to max of 1."""
 
         if self.env_params.evaluate:
             reward = self.vehicles.get_outflow_rate(500)
diff --git a/flow/envs/green_wave_env.py b/flow/envs/green_wave_env.py
index 5cdcf3eef..1727590c9 100644
--- a/flow/envs/green_wave_env.py
+++ b/flow/envs/green_wave_env.py
@@ -104,6 +104,7 @@ def __init__(self, env_params, sumo_params, scenario):
 
     @property
     def action_space(self):
+        """See class definition."""
         if self.discrete:
             return Discrete(2 ** self.num_traffic_lights)
         else:
@@ -115,6 +116,7 @@ def action_space(self):
 
     @property
     def observation_space(self):
+        """See class definition."""
         speed = Box(
             low=0,
             high=1,
@@ -138,6 +140,7 @@ def observation_space(self):
         return Tuple((speed, dist_to_intersec, edge_num, traffic_lights))
 
     def get_state(self):
+        """See class definition."""
         # compute the normalizers
         max_dist = max(self.scenario.short_length, self.scenario.long_length,
                        self.scenario.inner_length)
@@ -164,6 +167,7 @@ def get_state(self):
         return np.array(state)
 
     def _apply_rl_actions(self, rl_actions):
+        """See class definition."""
         # check if the action space is discrete
         if self.discrete:
             # convert single value to list of 0's and 1's
@@ -208,6 +212,7 @@ def _apply_rl_actions(self, rl_actions):
                     self.last_change[i, 2] = 0
 
     def compute_reward(self, state, rl_actions, **kwargs):
+        """See class definition."""
         return rewards.penalize_tl_changes(rl_actions >= 0.5, gain=1.0)
 
     # ===============================
@@ -468,8 +473,10 @@ def __init__(self, env_params, sumo_params, scenario):
     @property
     def observation_space(self):
         """
-        Partial observed state space. Velocities, distance to intersections,
-        edge number (for nearby vehicles) traffic light state
+        Partially observed state space.
+
+        Velocities, distance to intersections, edge number (for nearby
+        vehicles), and traffic light state.
         """
         tl_box = Box(
             low=0.,
@@ -547,12 +554,14 @@ def get_state(self):
             ]))
 
     def compute_reward(self, state, rl_actions, **kwargs):
+        """See class definition."""
         if self.env_params.evaluate:
             return rewards.min_delay_unscaled(self)
         else:
             return rewards.desired_velocity(self, fail=kwargs["fail"])
 
     def additional_command(self):
+        """See class definition."""
         # specify observed vehicles
         [self.vehicles.set_observed(veh_id) for veh_id in self.observed_ids]
 
@@ -564,7 +573,9 @@ class GreenWaveTestEnv(TrafficLightGridEnv):
     """
 
     def _apply_rl_actions(self, rl_actions):
+        """See class definition."""
         pass
 
     def compute_reward(self, state, rl_actions, **kwargs):
+        """No return, for testing purposes."""
         return 0
diff --git a/flow/envs/loop/lane_changing.py b/flow/envs/loop/lane_changing.py
index f6f07efa1..55dce714d 100755
--- a/flow/envs/loop/lane_changing.py
+++ b/flow/envs/loop/lane_changing.py
@@ -1,3 +1,5 @@
+"""Environments that can train both lane change and acceleration behaviors."""
+
 from flow.envs.base_env import Env
 from flow.core import rewards
 
@@ -20,8 +22,11 @@
 
 
 class LaneChangeAccelEnv(Env):
-    """Environment used to train autonomous vehicles to improve traffic flows
-    when lane-change and acceleration actions are permitted by the rl agent.
+    """Fully observable lane change and acceleration environment.
+
+    This environment is used to train autonomous vehicles to improve traffic
+    flows when lane-change and acceleration actions are permitted by the rl
+    agent.
 
     Required from env_params:
 
@@ -66,6 +71,7 @@ def __init__(self, env_params, sumo_params, scenario):
 
     @property
     def action_space(self):
+        """See class definition."""
         max_decel = self.env_params.additional_params["max_decel"]
         max_accel = self.env_params.additional_params["max_accel"]
 
@@ -76,6 +82,7 @@ def action_space(self):
 
     @property
     def observation_space(self):
+        """See class definition."""
         speed = Box(
             low=0,
             high=1,
@@ -94,6 +101,7 @@ def observation_space(self):
         return Tuple((speed, pos, lane))
 
     def compute_reward(self, state, rl_actions, **kwargs):
+        """See class definition."""
         # compute the system-level performance of vehicles from a velocity
         # perspective
         reward = rewards.desired_velocity(self, fail=kwargs["fail"])
@@ -107,6 +115,7 @@ def compute_reward(self, state, rl_actions, **kwargs):
         return reward
 
     def get_state(self):
+        """See class definition."""
         # normalizers
         max_speed = self.scenario.max_speed
         length = self.scenario.length
@@ -121,6 +130,7 @@ def get_state(self):
         ] for veh_id in self.sorted_ids])
 
     def _apply_rl_actions(self, actions):
+        """See class definition."""
         acceleration = actions[::2]
         direction = actions[1::2]
 
@@ -144,6 +154,7 @@ def _apply_rl_actions(self, actions):
         self.apply_lane_change(sorted_rl_ids, direction=direction)
 
     def additional_command(self):
+        """Define which vehicles are observed for visualization purposes."""
         # specify observed vehicles
         if self.vehicles.num_rl_vehicles > 0:
             for veh_id in self.vehicles.get_human_ids():
@@ -189,6 +200,7 @@ def __init__(self, env_params, sumo_params, scenario):
 
     @property
     def observation_space(self):
+        """See class definition."""
         return Box(
             low=0,
             high=1,
@@ -197,6 +209,7 @@ def observation_space(self):
             dtype=np.float32)
 
     def get_state(self):
+        """See class definition."""
         obs = [
             0
             for _ in range(4 * self.vehicles.num_rl_vehicles * self.num_lanes)
@@ -247,6 +260,7 @@ def get_state(self):
             return np.array(obs)
 
     def additional_command(self):
+        """Define which vehicles are observed for visualization purposes."""
         # specify observed vehicles
         for veh_id in self.visible:
             self.vehicles.set_observed(veh_id)
diff --git a/flow/envs/loop/loop_accel.py b/flow/envs/loop/loop_accel.py
index bfd3cf12b..42810f6e3 100755
--- a/flow/envs/loop/loop_accel.py
+++ b/flow/envs/loop/loop_accel.py
@@ -1,3 +1,5 @@
+"""Environment for training the acceleration behavior of vehicles in a loop."""
+
 from flow.envs.base_env import Env
 from flow.core import rewards
 
@@ -17,7 +19,9 @@
 
 
 class AccelEnv(Env):
-    """Environment used to train autonomous vehicles to improve traffic flows
+    """Fully observed acceleration environment.
+
+    This environment used to train autonomous vehicles to improve traffic flows
     when acceleration actions are permitted by the rl agent.
 
     Required from env_params:
@@ -54,6 +58,7 @@ def __init__(self, env_params, sumo_params, scenario):
 
     @property
     def action_space(self):
+        """See class definition."""
         return Box(
             low=-abs(self.env_params.additional_params["max_decel"]),
             high=self.env_params.additional_params["max_accel"],
@@ -62,6 +67,7 @@ def action_space(self):
 
     @property
     def observation_space(self):
+        """See class definition."""
         self.obs_var_labels = ["Velocity", "Absolute_pos"]
         speed = Box(
             low=0,
@@ -76,6 +82,7 @@ def observation_space(self):
         return Tuple((speed, pos))
 
     def _apply_rl_actions(self, rl_actions):
+        """See class definition."""
         sorted_rl_ids = [
             veh_id for veh_id in self.sorted_ids
             if veh_id in self.vehicles.get_rl_ids()
@@ -83,12 +90,14 @@ def _apply_rl_actions(self, rl_actions):
         self.apply_acceleration(sorted_rl_ids, rl_actions)
 
     def compute_reward(self, state, rl_actions, **kwargs):
+        """See class definition."""
         if self.env_params.evaluate:
             return np.mean(self.vehicles.get_speed(self.vehicles.get_ids()))
         else:
             return rewards.desired_velocity(self, fail=kwargs["fail"])
 
     def get_state(self, **kwargs):
+        """See class definition."""
         # speed normalizer
         max_speed = self.scenario.max_speed
 
@@ -98,6 +107,7 @@ def get_state(self, **kwargs):
         ] for veh_id in self.sorted_ids])
 
     def additional_command(self):
+        """Define which vehicles are observed for visualization purposes."""
         # specify observed vehicles
         if self.vehicles.num_rl_vehicles > 0:
             for veh_id in self.vehicles.get_human_ids():
diff --git a/flow/envs/loop/loop_merges.py b/flow/envs/loop/loop_merges.py
index 658be2622..a88f75770 100755
--- a/flow/envs/loop/loop_merges.py
+++ b/flow/envs/loop/loop_merges.py
@@ -1,3 +1,5 @@
+"""Environment for training cooperative merging behaviors in a loop merge."""
+
 from flow.envs.base_env import Env
 from flow.core import rewards
 
@@ -23,8 +25,7 @@
 
 
 class TwoLoopsMergePOEnv(Env):
-    """Environment for training cooperative merging behavior in a partially
-    observable closed loop merge scenario.
+    """Environment for training cooperative merging behaviors in a loop merge.
 
     WARNING: only supports 1 RL vehicle
 
@@ -81,6 +82,7 @@ def __init__(self, env_params, sumo_params, scenario):
 
     @property
     def observation_space(self):
+        """See class definition."""
         speed = Box(
             low=0,
             high=np.inf,
@@ -99,6 +101,7 @@ def observation_space(self):
 
     @property
     def action_space(self):
+        """See class definition."""
         return Box(
             low=-np.abs(self.env_params.additional_params["max_decel"]),
             high=self.env_params.additional_params["max_accel"],
@@ -106,6 +109,7 @@ def action_space(self):
             dtype=np.float32)
 
     def _apply_rl_actions(self, rl_actions):
+        """See class definition."""
         sorted_rl_ids = [
             veh_id for veh_id in self.sorted_ids
             if veh_id in self.vehicles.get_rl_ids()
@@ -113,6 +117,7 @@ def _apply_rl_actions(self, rl_actions):
         self.apply_acceleration(sorted_rl_ids, rl_actions)
 
     def compute_reward(self, state, rl_actions, **kwargs):
+        """See class definition."""
         vel_reward = rewards.desired_velocity(self, fail=kwargs["fail"])
 
         # Use a similar weighting of of the headway reward as the velocity
@@ -127,6 +132,7 @@ def compute_reward(self, state, rl_actions, **kwargs):
         return vel_reward + headway_reward
 
     def get_state(self, **kwargs):
+        """See class definition."""
         vel = np.zeros(self.n_obs_vehicles)
         pos = np.zeros(self.n_obs_vehicles)
 
@@ -206,7 +212,7 @@ def get_state(self, **kwargs):
 
     def sort_by_position(self):
         """
-        See parent class
+        See parent class.
 
         Instead of being sorted by a global reference, vehicles in this
         environment are sorted with regards to which ring this currently
diff --git a/flow/envs/loop/wave_attenuation.py b/flow/envs/loop/wave_attenuation.py
index 5f44fa685..8be89b693 100644
--- a/flow/envs/loop/wave_attenuation.py
+++ b/flow/envs/loop/wave_attenuation.py
@@ -1,3 +1,13 @@
+"""
+Environment used to train a stop-and-go dissipating controller.
+
+This is the environment that was used in:
+
+C. Wu, A. Kreidieh, K. Parvate, E. Vinitsky, A. Bayen, "Flow: Architecture and
+Benchmarking for Reinforcement Learning in Traffic Control," CoRR, vol.
+abs/1710.05465, 2017. [Online]. Available: https://arxiv.org/abs/1710.05465
+"""
+
 from flow.envs.base_env import Env
 from flow.core.params import InitialConfig, NetParams
 
@@ -20,8 +30,10 @@
 
 
 class WaveAttenuationEnv(Env):
-    """Environment used to train autonomous vehicles to attenuate the formation
-    and propagation of waves in a variable density ring road.
+    """Fully observable wave attenuation environment.
+
+    This environment is used to train autonomous vehicles to attenuate the
+    formation and propagation of waves in a variable density ring road.
 
     Required from env_params:
 
@@ -57,6 +69,7 @@ def __init__(self, env_params, sumo_params, scenario):
 
     @property
     def action_space(self):
+        """See class definition."""
         return Box(
             low=-np.abs(self.env_params.additional_params["max_decel"]),
             high=self.env_params.additional_params["max_accel"],
@@ -65,6 +78,7 @@ def action_space(self):
 
     @property
     def observation_space(self):
+        """See class definition."""
         self.obs_var_labels = ["Velocity", "Absolute_pos"]
         speed = Box(
             low=0,
@@ -79,6 +93,7 @@ def observation_space(self):
         return Tuple((speed, pos))
 
     def _apply_rl_actions(self, rl_actions):
+        """See class definition."""
         sorted_rl_ids = [
             veh_id for veh_id in self.sorted_ids
             if veh_id in self.vehicles.get_rl_ids()
@@ -86,6 +101,7 @@ def _apply_rl_actions(self, rl_actions):
         self.apply_acceleration(sorted_rl_ids, rl_actions)
 
     def compute_reward(self, state, rl_actions, **kwargs):
+        """See class definition."""
         # in the warmup steps
         if rl_actions is None:
             return 0
@@ -113,20 +129,25 @@ def compute_reward(self, state, rl_actions, **kwargs):
         return float(reward)
 
     def get_state(self, **kwargs):
+        """See class definition."""
         return np.array([[
             self.vehicles.get_speed(veh_id) / self.scenario.max_speed,
             self.get_x_by_id(veh_id) / self.scenario.length
         ] for veh_id in self.sorted_ids])
 
     def additional_command(self):
+        """Define which vehicles are observed for visualization purposes."""
         # specify observed vehicles
         if self.vehicles.num_rl_vehicles > 0:
             for veh_id in self.vehicles.get_human_ids():
                 self.vehicles.set_observed(veh_id)
 
     def reset(self):
-        """The sumo instance is reset with a new ring length, and a number of
-        steps are performed with the rl vehicle acting as a human vehicle."""
+        """See parent class.
+
+        The sumo instance is reset with a new ring length, and a number of
+        steps are performed with the rl vehicle acting as a human vehicle.
+        """
         # update the scenario
         initial_config = InitialConfig(bunching=50, min_gap=0)
         additional_net_params = {
@@ -216,9 +237,11 @@ class WaveAttenuationPOEnv(WaveAttenuationEnv):
 
     @property
     def observation_space(self):
+        """See class definition."""
         return Box(low=0, high=1, shape=(3, ), dtype=np.float32)
 
     def get_state(self, **kwargs):
+        """See class definition."""
         rl_id = self.vehicles.get_rl_ids()[0]
         lead_id = self.vehicles.get_leader(rl_id) or rl_id
 
@@ -236,6 +259,7 @@ def get_state(self, **kwargs):
         return observation
 
     def additional_command(self):
+        """Define which vehicles are observed for visualization purposes."""
         # specify observed vehicles
         rl_id = self.vehicles.get_rl_ids()[0]
         lead_id = self.vehicles.get_leader(rl_id) or rl_id
diff --git a/flow/envs/merge.py b/flow/envs/merge.py
index 6d572bff5..bbd0bb081 100644
--- a/flow/envs/merge.py
+++ b/flow/envs/merge.py
@@ -1,3 +1,10 @@
+"""
+Environments for training vehicles to reduce congestion in a merge.
+
+This environment was used in:
+TODO(ak): add paper after it has been published.
+"""
+
 from flow.envs.base_env import Env
 from flow.core import rewards
 
@@ -19,8 +26,10 @@
 
 
 class WaveAttenuationMergePOEnv(Env):
-    """Environment used to train autonomous vehicles to attenuate the formation
-    and propagation of waves in an open merge network.
+    """Partially observable merge environment.
+
+    This environment is used to train autonomous vehicles to attenuate the
+    formation and propagation of waves in an open merge network.
 
     Required from env_params:
 
@@ -81,6 +90,7 @@ def __init__(self, env_params, sumo_params, scenario):
 
     @property
     def action_space(self):
+        """See class definition."""
         return Box(
             low=-abs(self.env_params.additional_params["max_decel"]),
             high=self.env_params.additional_params["max_accel"],
@@ -89,9 +99,11 @@ def action_space(self):
 
     @property
     def observation_space(self):
+        """See class definition."""
         return Box(low=0, high=1, shape=(5 * self.num_rl, ), dtype=np.float32)
 
     def _apply_rl_actions(self, rl_actions):
+        """See class definition."""
         for i, rl_id in enumerate(self.rl_veh):
             # ignore rl vehicles outside the network
             if rl_id not in self.vehicles.get_rl_ids():
@@ -99,6 +111,7 @@ def _apply_rl_actions(self, rl_actions):
             self.apply_acceleration([rl_id], [rl_actions[i]])
 
     def get_state(self, rl_id=None, **kwargs):
+        """See class definition."""
         self.leader = []
         self.follower = []
 
@@ -140,6 +153,7 @@ def get_state(self, rl_id=None, **kwargs):
         return observation
 
     def compute_reward(self, state, rl_actions, **kwargs):
+        """See class definition."""
         if self.env_params.evaluate:
             return np.mean(self.vehicles.get_speed(self.vehicles.get_ids()))
         else:
@@ -168,11 +182,27 @@ def compute_reward(self, state, rl_actions, **kwargs):
             return max(eta1 * cost1 + eta2 * cost2, 0)
 
     def sort_by_position(self):
+        """See parent class.
+
+        Sorting occurs by the ``get_x_by_id`` method instead of
+        ``get_absolute_position``.
+        """
         # vehicles are sorted by their get_x_by_id value
         sorted_ids = sorted(self.vehicles.get_ids(), key=self.get_x_by_id)
         return sorted_ids, None
 
     def additional_command(self):
+        """See parent class.
+
+        This method performs to auxiliary tasks:
+
+        * Define which vehicles are observed for visualization purposes.
+        * Maintains the "rl_veh" and "rl_queue" variables to ensure the RL
+          vehicles that are represented in the state space does not change
+          until one of the vehicles in the state space leaves the network.
+          Then, the next vehicle in the queue is added to the state space and
+          provided with actions from the policy.
+        """
         # add rl vehicles that just entered the network into the rl queue
         for veh_id in self.vehicles.get_rl_ids():
             if veh_id not in list(self.rl_queue) + self.rl_veh:
@@ -196,6 +226,11 @@ def additional_command(self):
             self.vehicles.set_observed(veh_id)
 
     def reset(self):
+        """See parent class.
+
+        In addition, a few variables that are specific to this class are
+        emptied before they are used by the new rollout.
+        """
         self.leader = []
         self.follower = []
         return super().reset()
diff --git a/flow/envs/test.py b/flow/envs/test.py
index bfc8b2411..21483b8bf 100644
--- a/flow/envs/test.py
+++ b/flow/envs/test.py
@@ -1,3 +1,5 @@
+"""Test environment used to run simulations in the absence of autonomy."""
+
 from flow.envs.base_env import Env
 from gym.spaces.box import Box
 import numpy as np
@@ -29,17 +31,22 @@ class and returns a real number.
 
     @property
     def action_space(self):
+        """See class definition."""
         return Box(low=0, high=0, shape=0, dtype=np.float32)
 
     @property
     def observation_space(self):
+        """See class definition."""
         return Box(low=0, high=0, shape=0, dtype=np.float32)
 
     def _apply_rl_actions(self, rl_actions):
+        """See class definition."""
         return
 
     def compute_reward(self, state, rl_actions, **kwargs):
+        """See class definition."""
         return 0
 
     def get_state(self, **kwargs):
+        """See class definition."""
         return np.array([])