flow-project · eugenevinitsky · Sep 3, 2018 · Aug 27, 2018 · Sep 3, 2018
@@ -1,5 +1,7 @@
+"""Contains all callable environments in Flow."""
+
 from flow.envs.base_env import Env
-from flow.envs.bay_bridge import BayBridgeEnv
+from flow.envs.bay_bridge.base import BayBridgeEnv
 from flow.envs.bottleneck_env import BottleNeckAccelEnv, BottleneckEnv, \
     DesiredVelocityEnv
 from flow.envs.green_wave_env import TrafficLightGridEnv, \

@@ -1,3 +1,5 @@
+"""Base environment class. This is the parent of all other environments."""
+
 import logging
 import os
 import signal
@@ -35,35 +37,36 @@
 
 
 class Env(gym.Env, Serializable):
+    """Base environment class.
+
+    Provides the interface for controlling a SUMO simulation. Using this
+    class, you can start sumo, provide a scenario to specify a
+    configuration and controllers, perform simulation steps, and reset the
+    simulation to an initial configuration.
+
+    Env is Serializable to allow for pickling and replaying of the policy.
+
+    This class cannot be used as is: you must extend it to implement an
+    action applicator method, and properties to define the MDP if you
+    choose to use it with an rl library (e.g. RLlib). This can be done by
+    overloading the following functions in a child class:
+     - action_space
+     - observation_space
+     - apply_rl_action
+     - get_state
+     - compute_reward
+
+    Attributes
+    ----------
+    env_params: EnvParams type:
+       see flow/core/params.py
+    sumo_params: SumoParams type
+       see flow/core/params.py
+    scenario: Scenario type
+        see flow/scenarios/base_scenario.py
+    """
+
     def __init__(self, env_params, sumo_params, scenario):
-        """Base environment class.
-
-        Provides the interface for controlling a SUMO simulation. Using this
-        class, you can start sumo, provide a scenario to specify a
-        configuration and controllers, perform simulation steps, and reset the
-        simulation to an initial configuration.
-
-        Env is Serializable to allow for pickling and replaying of the policy.
-
-        This class cannot be used as is: you must extend it to implement an
-        action applicator method, and properties to define the MDP if you
-        choose to use it with an rl library (e.g. RLlib). This can be done by
-        overloading the following functions in a child class:
-         - action_space
-         - observation_space
-         - apply_rl_action
-         - get_state
-         - compute_reward
-
-        Attributes
-        ----------
-        env_params: EnvParams type:
-           see flow/core/params.py
-        sumo_params: SumoParams type
-           see flow/core/params.py
-        scenario: Scenario type
-            see flow/scenarios/base_scenario.py
-        """
         # Invoke serializable if using rllab
         if Serializable is not object:
             Serializable.quick_init(self, locals())
@@ -128,7 +131,7 @@ def __init__(self, env_params, sumo_params, scenario):
         self.setup_initial_state()
 
     def restart_sumo(self, sumo_params, sumo_binary=None):
-        """Restarts an already initialized sumo instance.
+        """Restart an already initialized sumo instance.
 
         This is used when visualizing a rollout, in order to update the
         sumo_binary with potentially a gui and export emission data from sumo.
@@ -157,7 +160,7 @@ def restart_sumo(self, sumo_params, sumo_binary=None):
         self.setup_initial_state()
 
     def start_sumo(self):
-        """Starts a sumo instance.
+        """Start a sumo instance.
 
         Uses the configuration files created by the generator class to
         initialize a sumo instance. Also initializes a traci connection to
@@ -254,11 +257,11 @@ def start_sumo(self):
         raise error
 
     def setup_initial_state(self):
-        """Returns information on the initial state of the vehicles in the
-        network, to be used upon reset.
+        """Return information on the initial state of vehicles in the network.
 
-        Also adds initial state information to the self.vehicles class and
-        starts a subscription with sumo to collect state information each step.
+        This information is to be used upon reset. This method also adds this
+        information to the self.vehicles class and starts a subscription with
+        sumo to collect state information each step.
 
         Returns
         -------
@@ -348,7 +351,7 @@ def setup_initial_state(self):
         self.vehicles.update(vehicle_obs, id_lists, self)
 
     def step(self, rl_actions):
-        """Advances the environment by one step.
+        """Advance the environment by one step.
 
         Assigns actions to autonomous and human-driven agents (i.e. vehicles,
         traffic lights, etc...). Actions that are not assigned are left to the
@@ -460,7 +463,7 @@ def step(self, rl_actions):
         return next_observation, reward, crash, {}
 
     def reset(self):
-        """Resets the environment.
+        """Reset the environment.
 
         This method is performed in between rollouts. It resets the state of
         the environment, and re-initializes the vehicles in their starting
@@ -619,7 +622,7 @@ def additional_command(self):
         pass
 
     def apply_rl_actions(self, rl_actions=None):
-        """Specifies the actions to be performed by the rl agent(s).
+        """Specify the actions to be performed by the rl agent(s).
 
         If no actions are provided at any given step, the rl agents default to
         performing actions specified by sumo.
@@ -646,7 +649,7 @@ def _apply_rl_actions(self, rl_actions):
         raise NotImplementedError
 
     def apply_acceleration(self, veh_ids, acc):
-        """Applies the acceleration requested by a vehicle in sumo.
+        """Apply the acceleration requested by a vehicle in sumo.
 
         Note that, if the sumo-specified speed mode of the vehicle is not
         "aggressive", the acceleration may be clipped by some safety velocity
@@ -666,8 +669,12 @@ def apply_acceleration(self, veh_ids, acc):
                 self.traci_connection.vehicle.slowDown(vid, next_vel, 1)
 
     def apply_lane_change(self, veh_ids, direction):
-        """Applies an instantaneous lane-change to a set of vehicles, while
-        preventing vehicles from moving to lanes that do not exist.
+        """Apply an instantaneous lane-change to a set of vehicles.
+
+        This method also prevents vehicles from moving to lanes that do not
+        exist, and set the "last_lc" variable for RL vehicles that lane changed
+        to match the current time step, in order to assist in maintaining a
+        lane change duration for these vehicles.
 
         Parameters
         ----------
@@ -711,7 +718,7 @@ def apply_lane_change(self, veh_ids, direction):
                         self.vehicles.get_state(veh_id, "last_lc")
 
     def choose_routes(self, veh_ids, route_choices):
-        """Updates the route choice of vehicles in the network.
+        """Update the route choice of vehicles in the network.
 
         Parameters
         ----------
@@ -728,8 +735,11 @@ def choose_routes(self, veh_ids, route_choices):
                     vehID=veh_id, edgeList=route_choices[i])
 
     def get_x_by_id(self, veh_id):
-        """Provides a 1-dimensional representation of the position of a vehicle
-        in the network.
+        """Provide a 1-D representation of the position of a vehicle.
+
+        Note: These values are only meaningful if the specify_edge_starts
+        method in the scenario is set appropriately; otherwise, a value of 0 is
+        returned for all vehicles.
 
         Parameters
         ----------
@@ -748,7 +758,7 @@ def get_x_by_id(self, veh_id):
             self.vehicles.get_edge(veh_id), self.vehicles.get_position(veh_id))
 
     def sort_by_position(self):
-        """Sorts the vehicle ids of vehicles in the network by position.
+        """Sort the vehicle ids of vehicles in the network by position.
 
         The base environment does this by sorting vehicles by their absolute
         position.
@@ -771,7 +781,7 @@ def sort_by_position(self):
             return self.vehicles.get_ids(), None
 
     def update_vehicle_colors(self):
-        """Modifies the color of vehicles if rendering is active.
+        """Modify the color of vehicles if rendering is active.
 
         The colors of all vehicles are updated as follows:
         - red: autonomous (rl) vehicles
@@ -809,7 +819,7 @@ def update_vehicle_colors(self):
             self.vehicles.remove_observed(veh_id)
 
     def get_state(self):
-        """Returns the state of the simulation as perceived by the RL agent.
+        """Return the state of the simulation as perceived by the RL agent.
 
         MUST BE implemented in new environments.
 
@@ -823,8 +833,7 @@ def get_state(self):
 
     @property
     def action_space(self):
-        """Identifies the dimensions and bounds of the action space (needed for
-        gym environments).
+        """Identify the dimensions and bounds of the action space.
 
         MUST BE implemented in new environments.
 
@@ -837,8 +846,7 @@ def action_space(self):
 
     @property
     def observation_space(self):
-        """Identifies the dimensions and bounds of the observation space
-        (needed for gym environments).
+        """Identify the dimensions and bounds of the observation space.
 
         MUST BE implemented in new environments.
 
@@ -873,7 +881,7 @@ def compute_reward(self, state, rl_actions, **kwargs):
         return 0
 
     def terminate(self):
-        """Closes the TraCI I/O connection.
+        """Close the TraCI I/O connection.
 
         Should be done at end of every experiment. Must be in Env because the
         environment opens the TraCI connection.
@@ -885,6 +893,7 @@ def _close(self):
         self.scenario.close()
 
     def teardown_sumo(self):
+        """Kill the sumo subprocess instance."""
         try:
             os.killpg(self.sumo_proc.pid, signal.SIGTERM)
         except Exception:
@@ -894,4 +903,5 @@ def _seed(self, seed=None):
         return []
 
     def render(self, mode='human'):
+        """See parent class (gym.Env)."""
         pass
@@ -209,12 +209,15 @@ def apply_toll_bridge_control(self):
 
     # TODO: decide on a good reward function
     def compute_reward(self, state, rl_actions, **kwargs):
+        """See class definition."""
         return np.mean(self.vehicles.get_speed(self.vehicles.get_ids()))
 
     """ The below methods need to be updated by child classes. """
 
     def _apply_rl_actions(self, rl_actions):
+        """Implemented by child classes."""
         pass
 
     def get_state(self):
+        """Implemented by child classes."""
         return []
@@ -1,3 +1,10 @@
+"""
+Environments for training vehicles to reduce capacity drops in a bottleneck.
+
+This environment was used in:
+TODO(ak): add paper after it has been published.
+"""
+
 from flow.controllers.rlcontroller import RLController
 from flow.controllers.lane_change_controllers import SumoLaneChangeController
 from flow.controllers.routing_controllers import ContinuousRouter
@@ -347,6 +354,7 @@ def get_avg_bottleneck_velocity(self):
     # Dummy action and observation spaces
     @property
     def action_space(self):
+        """See class definition."""
         return Box(
             low=-float("inf"),
             high=float("inf"),
@@ -355,6 +363,7 @@ def action_space(self):
 
     @property
     def observation_space(self):
+        """See class definition."""
         return Box(
             low=-float("inf"),
             high=float("inf"),
@@ -369,6 +378,7 @@ def compute_reward(self, state, rl_actions, **kwargs):
         return reward
 
     def get_state(self):
+        """See class definition."""
         return np.asarray([1])
 
 
@@ -411,6 +421,7 @@ def __init__(self, env_params, sumo_params, scenario):
 
     @property
     def observation_space(self):
+        """See class definition."""
         num_edges = len(self.scenario.get_edge_list())
         num_rl_veh = self.num_rl
         num_obs = 2 * num_edges + 4 * MAX_LANES * self.scaling \
@@ -419,6 +430,7 @@ def observation_space(self):
         return Box(low=0, high=1, shape=(num_obs, ), dtype=np.float32)
 
     def get_state(self):
+        """See class definition."""
         headway_scale = 1000
 
         rl_ids = self.vehicles.get_rl_ids()
@@ -521,6 +533,7 @@ def get_state(self):
         return np.concatenate((rl_obs, relative_obs, edge_obs))
 
     def compute_reward(self, state, rl_actions, **kwargs):
+        """See class definition."""
         num_rl = self.vehicles.num_rl_vehicles
         lane_change_acts = np.abs(np.round(rl_actions[1::2])[:num_rl])
         return (rewards.desired_velocity(self) + rewards.rl_forward_progress(
@@ -536,7 +549,7 @@ def sort_by_position(self):
 
     def _apply_rl_actions(self, actions):
         """
-        See parent class
+        See parent class.
 
         Takes a tuple and applies a lane change or acceleration. if a lane
         change is applied, don't issue any commands
@@ -711,6 +724,7 @@ def __init__(self, env_params, sumo_params, scenario):
 
     @property
     def observation_space(self):
+        """See class definition."""
         num_obs = 0
         # density and velocity for rl and non-rl vehicles per segment
         # Last element is the outflow
@@ -721,6 +735,7 @@ def observation_space(self):
 
     @property
     def action_space(self):
+        """See class definition."""
         if self.symmetric:
             action_size = self.total_controlled_segments
         else:
@@ -733,6 +748,7 @@ def action_space(self):
             low=-1.5, high=1.0, shape=(int(action_size), ), dtype=np.float32)
 
     def get_state(self):
+        """See class definition."""
         # action space is number of vehicles in each segment in each lane,
         # number of rl vehicles in each segment in each lane
         # mean speed in each segment, and mean rl speed in each
@@ -829,7 +845,7 @@ def _apply_rl_actions(self, rl_actions):
                     self.traci_connection.vehicle.setMaxSpeed(rl_id, 23.0)
 
     def compute_reward(self, state, rl_actions, **kwargs):
-        """ Outflow rate over last ten seconds normalized to max of 1 """
+        """Outflow rate over last ten seconds normalized to max of 1."""
 
         if self.env_params.evaluate:
             reward = self.vehicles.get_outflow_rate(500)