diff --git a/flow/envs/__init__.py b/flow/envs/__init__.py index 39d39a9b7..3d287f105 100755 --- a/flow/envs/__init__.py +++ b/flow/envs/__init__.py @@ -1,5 +1,7 @@ +"""Contains all callable environments in Flow.""" + from flow.envs.base_env import Env -from flow.envs.bay_bridge import BayBridgeEnv +from flow.envs.bay_bridge.base import BayBridgeEnv from flow.envs.bottleneck_env import BottleNeckAccelEnv, BottleneckEnv, \ DesiredVelocityEnv from flow.envs.green_wave_env import TrafficLightGridEnv, \ diff --git a/flow/envs/base_env.py b/flow/envs/base_env.py index 445f2b1e7..605aed2b1 100755 --- a/flow/envs/base_env.py +++ b/flow/envs/base_env.py @@ -1,3 +1,5 @@ +"""Base environment class. This is the parent of all other environments.""" + import logging import os import signal @@ -35,35 +37,36 @@ class Env(gym.Env, Serializable): + """Base environment class. + + Provides the interface for controlling a SUMO simulation. Using this + class, you can start sumo, provide a scenario to specify a + configuration and controllers, perform simulation steps, and reset the + simulation to an initial configuration. + + Env is Serializable to allow for pickling and replaying of the policy. + + This class cannot be used as is: you must extend it to implement an + action applicator method, and properties to define the MDP if you + choose to use it with an rl library (e.g. RLlib). This can be done by + overloading the following functions in a child class: + - action_space + - observation_space + - apply_rl_action + - get_state + - compute_reward + + Attributes + ---------- + env_params: EnvParams type: + see flow/core/params.py + sumo_params: SumoParams type + see flow/core/params.py + scenario: Scenario type + see flow/scenarios/base_scenario.py + """ + def __init__(self, env_params, sumo_params, scenario): - """Base environment class. - - Provides the interface for controlling a SUMO simulation. Using this - class, you can start sumo, provide a scenario to specify a - configuration and controllers, perform simulation steps, and reset the - simulation to an initial configuration. - - Env is Serializable to allow for pickling and replaying of the policy. - - This class cannot be used as is: you must extend it to implement an - action applicator method, and properties to define the MDP if you - choose to use it with an rl library (e.g. RLlib). This can be done by - overloading the following functions in a child class: - - action_space - - observation_space - - apply_rl_action - - get_state - - compute_reward - - Attributes - ---------- - env_params: EnvParams type: - see flow/core/params.py - sumo_params: SumoParams type - see flow/core/params.py - scenario: Scenario type - see flow/scenarios/base_scenario.py - """ # Invoke serializable if using rllab if Serializable is not object: Serializable.quick_init(self, locals()) @@ -128,7 +131,7 @@ def __init__(self, env_params, sumo_params, scenario): self.setup_initial_state() def restart_sumo(self, sumo_params, sumo_binary=None): - """Restarts an already initialized sumo instance. + """Restart an already initialized sumo instance. This is used when visualizing a rollout, in order to update the sumo_binary with potentially a gui and export emission data from sumo. @@ -157,7 +160,7 @@ def restart_sumo(self, sumo_params, sumo_binary=None): self.setup_initial_state() def start_sumo(self): - """Starts a sumo instance. + """Start a sumo instance. Uses the configuration files created by the generator class to initialize a sumo instance. Also initializes a traci connection to @@ -254,11 +257,11 @@ def start_sumo(self): raise error def setup_initial_state(self): - """Returns information on the initial state of the vehicles in the - network, to be used upon reset. + """Return information on the initial state of vehicles in the network. - Also adds initial state information to the self.vehicles class and - starts a subscription with sumo to collect state information each step. + This information is to be used upon reset. This method also adds this + information to the self.vehicles class and starts a subscription with + sumo to collect state information each step. Returns ------- @@ -348,7 +351,7 @@ def setup_initial_state(self): self.vehicles.update(vehicle_obs, id_lists, self) def step(self, rl_actions): - """Advances the environment by one step. + """Advance the environment by one step. Assigns actions to autonomous and human-driven agents (i.e. vehicles, traffic lights, etc...). Actions that are not assigned are left to the @@ -460,7 +463,7 @@ def step(self, rl_actions): return next_observation, reward, crash, {} def reset(self): - """Resets the environment. + """Reset the environment. This method is performed in between rollouts. It resets the state of the environment, and re-initializes the vehicles in their starting @@ -619,7 +622,7 @@ def additional_command(self): pass def apply_rl_actions(self, rl_actions=None): - """Specifies the actions to be performed by the rl agent(s). + """Specify the actions to be performed by the rl agent(s). If no actions are provided at any given step, the rl agents default to performing actions specified by sumo. @@ -646,7 +649,7 @@ def _apply_rl_actions(self, rl_actions): raise NotImplementedError def apply_acceleration(self, veh_ids, acc): - """Applies the acceleration requested by a vehicle in sumo. + """Apply the acceleration requested by a vehicle in sumo. Note that, if the sumo-specified speed mode of the vehicle is not "aggressive", the acceleration may be clipped by some safety velocity @@ -666,8 +669,12 @@ def apply_acceleration(self, veh_ids, acc): self.traci_connection.vehicle.slowDown(vid, next_vel, 1) def apply_lane_change(self, veh_ids, direction): - """Applies an instantaneous lane-change to a set of vehicles, while - preventing vehicles from moving to lanes that do not exist. + """Apply an instantaneous lane-change to a set of vehicles. + + This method also prevents vehicles from moving to lanes that do not + exist, and set the "last_lc" variable for RL vehicles that lane changed + to match the current time step, in order to assist in maintaining a + lane change duration for these vehicles. Parameters ---------- @@ -711,7 +718,7 @@ def apply_lane_change(self, veh_ids, direction): self.vehicles.get_state(veh_id, "last_lc") def choose_routes(self, veh_ids, route_choices): - """Updates the route choice of vehicles in the network. + """Update the route choice of vehicles in the network. Parameters ---------- @@ -728,8 +735,11 @@ def choose_routes(self, veh_ids, route_choices): vehID=veh_id, edgeList=route_choices[i]) def get_x_by_id(self, veh_id): - """Provides a 1-dimensional representation of the position of a vehicle - in the network. + """Provide a 1-D representation of the position of a vehicle. + + Note: These values are only meaningful if the specify_edge_starts + method in the scenario is set appropriately; otherwise, a value of 0 is + returned for all vehicles. Parameters ---------- @@ -748,7 +758,7 @@ def get_x_by_id(self, veh_id): self.vehicles.get_edge(veh_id), self.vehicles.get_position(veh_id)) def sort_by_position(self): - """Sorts the vehicle ids of vehicles in the network by position. + """Sort the vehicle ids of vehicles in the network by position. The base environment does this by sorting vehicles by their absolute position. @@ -771,7 +781,7 @@ def sort_by_position(self): return self.vehicles.get_ids(), None def update_vehicle_colors(self): - """Modifies the color of vehicles if rendering is active. + """Modify the color of vehicles if rendering is active. The colors of all vehicles are updated as follows: - red: autonomous (rl) vehicles @@ -809,7 +819,7 @@ def update_vehicle_colors(self): self.vehicles.remove_observed(veh_id) def get_state(self): - """Returns the state of the simulation as perceived by the RL agent. + """Return the state of the simulation as perceived by the RL agent. MUST BE implemented in new environments. @@ -823,8 +833,7 @@ def get_state(self): @property def action_space(self): - """Identifies the dimensions and bounds of the action space (needed for - gym environments). + """Identify the dimensions and bounds of the action space. MUST BE implemented in new environments. @@ -837,8 +846,7 @@ def action_space(self): @property def observation_space(self): - """Identifies the dimensions and bounds of the observation space - (needed for gym environments). + """Identify the dimensions and bounds of the observation space. MUST BE implemented in new environments. @@ -873,7 +881,7 @@ def compute_reward(self, state, rl_actions, **kwargs): return 0 def terminate(self): - """Closes the TraCI I/O connection. + """Close the TraCI I/O connection. Should be done at end of every experiment. Must be in Env because the environment opens the TraCI connection. @@ -885,6 +893,7 @@ def _close(self): self.scenario.close() def teardown_sumo(self): + """Kill the sumo subprocess instance.""" try: os.killpg(self.sumo_proc.pid, signal.SIGTERM) except Exception: @@ -894,4 +903,5 @@ def _seed(self, seed=None): return [] def render(self, mode='human'): + """See parent class (gym.Env).""" pass diff --git a/flow/envs/bay_bridge/__init__.py b/flow/envs/bay_bridge/__init__.py deleted file mode 100644 index d5802d7e0..000000000 --- a/flow/envs/bay_bridge/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from flow.envs.bay_bridge.base import BayBridgeEnv - -__all__ = ["BayBridgeEnv"] diff --git a/flow/envs/bay_bridge/base.py b/flow/envs/bay_bridge/base.py index 8cb188d8f..411f0bfe4 100644 --- a/flow/envs/bay_bridge/base.py +++ b/flow/envs/bay_bridge/base.py @@ -209,12 +209,15 @@ def apply_toll_bridge_control(self): # TODO: decide on a good reward function def compute_reward(self, state, rl_actions, **kwargs): + """See class definition.""" return np.mean(self.vehicles.get_speed(self.vehicles.get_ids())) """ The below methods need to be updated by child classes. """ def _apply_rl_actions(self, rl_actions): + """Implemented by child classes.""" pass def get_state(self): + """Implemented by child classes.""" return [] diff --git a/flow/envs/bottleneck_env.py b/flow/envs/bottleneck_env.py index bf8befdef..10fe5fd08 100644 --- a/flow/envs/bottleneck_env.py +++ b/flow/envs/bottleneck_env.py @@ -1,3 +1,10 @@ +""" +Environments for training vehicles to reduce capacity drops in a bottleneck. + +This environment was used in: +TODO(ak): add paper after it has been published. +""" + from flow.controllers.rlcontroller import RLController from flow.controllers.lane_change_controllers import SumoLaneChangeController from flow.controllers.routing_controllers import ContinuousRouter @@ -347,6 +354,7 @@ def get_avg_bottleneck_velocity(self): # Dummy action and observation spaces @property def action_space(self): + """See class definition.""" return Box( low=-float("inf"), high=float("inf"), @@ -355,6 +363,7 @@ def action_space(self): @property def observation_space(self): + """See class definition.""" return Box( low=-float("inf"), high=float("inf"), @@ -369,6 +378,7 @@ def compute_reward(self, state, rl_actions, **kwargs): return reward def get_state(self): + """See class definition.""" return np.asarray([1]) @@ -411,6 +421,7 @@ def __init__(self, env_params, sumo_params, scenario): @property def observation_space(self): + """See class definition.""" num_edges = len(self.scenario.get_edge_list()) num_rl_veh = self.num_rl num_obs = 2 * num_edges + 4 * MAX_LANES * self.scaling \ @@ -419,6 +430,7 @@ def observation_space(self): return Box(low=0, high=1, shape=(num_obs, ), dtype=np.float32) def get_state(self): + """See class definition.""" headway_scale = 1000 rl_ids = self.vehicles.get_rl_ids() @@ -521,6 +533,7 @@ def get_state(self): return np.concatenate((rl_obs, relative_obs, edge_obs)) def compute_reward(self, state, rl_actions, **kwargs): + """See class definition.""" num_rl = self.vehicles.num_rl_vehicles lane_change_acts = np.abs(np.round(rl_actions[1::2])[:num_rl]) return (rewards.desired_velocity(self) + rewards.rl_forward_progress( @@ -536,7 +549,7 @@ def sort_by_position(self): def _apply_rl_actions(self, actions): """ - See parent class + See parent class. Takes a tuple and applies a lane change or acceleration. if a lane change is applied, don't issue any commands @@ -711,6 +724,7 @@ def __init__(self, env_params, sumo_params, scenario): @property def observation_space(self): + """See class definition.""" num_obs = 0 # density and velocity for rl and non-rl vehicles per segment # Last element is the outflow @@ -721,6 +735,7 @@ def observation_space(self): @property def action_space(self): + """See class definition.""" if self.symmetric: action_size = self.total_controlled_segments else: @@ -733,6 +748,7 @@ def action_space(self): low=-1.5, high=1.0, shape=(int(action_size), ), dtype=np.float32) def get_state(self): + """See class definition.""" # action space is number of vehicles in each segment in each lane, # number of rl vehicles in each segment in each lane # mean speed in each segment, and mean rl speed in each @@ -829,7 +845,7 @@ def _apply_rl_actions(self, rl_actions): self.traci_connection.vehicle.setMaxSpeed(rl_id, 23.0) def compute_reward(self, state, rl_actions, **kwargs): - """ Outflow rate over last ten seconds normalized to max of 1 """ + """Outflow rate over last ten seconds normalized to max of 1.""" if self.env_params.evaluate: reward = self.vehicles.get_outflow_rate(500) diff --git a/flow/envs/green_wave_env.py b/flow/envs/green_wave_env.py index 5cdcf3eef..1727590c9 100644 --- a/flow/envs/green_wave_env.py +++ b/flow/envs/green_wave_env.py @@ -104,6 +104,7 @@ def __init__(self, env_params, sumo_params, scenario): @property def action_space(self): + """See class definition.""" if self.discrete: return Discrete(2 ** self.num_traffic_lights) else: @@ -115,6 +116,7 @@ def action_space(self): @property def observation_space(self): + """See class definition.""" speed = Box( low=0, high=1, @@ -138,6 +140,7 @@ def observation_space(self): return Tuple((speed, dist_to_intersec, edge_num, traffic_lights)) def get_state(self): + """See class definition.""" # compute the normalizers max_dist = max(self.scenario.short_length, self.scenario.long_length, self.scenario.inner_length) @@ -164,6 +167,7 @@ def get_state(self): return np.array(state) def _apply_rl_actions(self, rl_actions): + """See class definition.""" # check if the action space is discrete if self.discrete: # convert single value to list of 0's and 1's @@ -208,6 +212,7 @@ def _apply_rl_actions(self, rl_actions): self.last_change[i, 2] = 0 def compute_reward(self, state, rl_actions, **kwargs): + """See class definition.""" return rewards.penalize_tl_changes(rl_actions >= 0.5, gain=1.0) # =============================== @@ -468,8 +473,10 @@ def __init__(self, env_params, sumo_params, scenario): @property def observation_space(self): """ - Partial observed state space. Velocities, distance to intersections, - edge number (for nearby vehicles) traffic light state + Partially observed state space. + + Velocities, distance to intersections, edge number (for nearby + vehicles), and traffic light state. """ tl_box = Box( low=0., @@ -547,12 +554,14 @@ def get_state(self): ])) def compute_reward(self, state, rl_actions, **kwargs): + """See class definition.""" if self.env_params.evaluate: return rewards.min_delay_unscaled(self) else: return rewards.desired_velocity(self, fail=kwargs["fail"]) def additional_command(self): + """See class definition.""" # specify observed vehicles [self.vehicles.set_observed(veh_id) for veh_id in self.observed_ids] @@ -564,7 +573,9 @@ class GreenWaveTestEnv(TrafficLightGridEnv): """ def _apply_rl_actions(self, rl_actions): + """See class definition.""" pass def compute_reward(self, state, rl_actions, **kwargs): + """No return, for testing purposes.""" return 0 diff --git a/flow/envs/loop/lane_changing.py b/flow/envs/loop/lane_changing.py index f6f07efa1..55dce714d 100755 --- a/flow/envs/loop/lane_changing.py +++ b/flow/envs/loop/lane_changing.py @@ -1,3 +1,5 @@ +"""Environments that can train both lane change and acceleration behaviors.""" + from flow.envs.base_env import Env from flow.core import rewards @@ -20,8 +22,11 @@ class LaneChangeAccelEnv(Env): - """Environment used to train autonomous vehicles to improve traffic flows - when lane-change and acceleration actions are permitted by the rl agent. + """Fully observable lane change and acceleration environment. + + This environment is used to train autonomous vehicles to improve traffic + flows when lane-change and acceleration actions are permitted by the rl + agent. Required from env_params: @@ -66,6 +71,7 @@ def __init__(self, env_params, sumo_params, scenario): @property def action_space(self): + """See class definition.""" max_decel = self.env_params.additional_params["max_decel"] max_accel = self.env_params.additional_params["max_accel"] @@ -76,6 +82,7 @@ def action_space(self): @property def observation_space(self): + """See class definition.""" speed = Box( low=0, high=1, @@ -94,6 +101,7 @@ def observation_space(self): return Tuple((speed, pos, lane)) def compute_reward(self, state, rl_actions, **kwargs): + """See class definition.""" # compute the system-level performance of vehicles from a velocity # perspective reward = rewards.desired_velocity(self, fail=kwargs["fail"]) @@ -107,6 +115,7 @@ def compute_reward(self, state, rl_actions, **kwargs): return reward def get_state(self): + """See class definition.""" # normalizers max_speed = self.scenario.max_speed length = self.scenario.length @@ -121,6 +130,7 @@ def get_state(self): ] for veh_id in self.sorted_ids]) def _apply_rl_actions(self, actions): + """See class definition.""" acceleration = actions[::2] direction = actions[1::2] @@ -144,6 +154,7 @@ def _apply_rl_actions(self, actions): self.apply_lane_change(sorted_rl_ids, direction=direction) def additional_command(self): + """Define which vehicles are observed for visualization purposes.""" # specify observed vehicles if self.vehicles.num_rl_vehicles > 0: for veh_id in self.vehicles.get_human_ids(): @@ -189,6 +200,7 @@ def __init__(self, env_params, sumo_params, scenario): @property def observation_space(self): + """See class definition.""" return Box( low=0, high=1, @@ -197,6 +209,7 @@ def observation_space(self): dtype=np.float32) def get_state(self): + """See class definition.""" obs = [ 0 for _ in range(4 * self.vehicles.num_rl_vehicles * self.num_lanes) @@ -247,6 +260,7 @@ def get_state(self): return np.array(obs) def additional_command(self): + """Define which vehicles are observed for visualization purposes.""" # specify observed vehicles for veh_id in self.visible: self.vehicles.set_observed(veh_id) diff --git a/flow/envs/loop/loop_accel.py b/flow/envs/loop/loop_accel.py index bfd3cf12b..42810f6e3 100755 --- a/flow/envs/loop/loop_accel.py +++ b/flow/envs/loop/loop_accel.py @@ -1,3 +1,5 @@ +"""Environment for training the acceleration behavior of vehicles in a loop.""" + from flow.envs.base_env import Env from flow.core import rewards @@ -17,7 +19,9 @@ class AccelEnv(Env): - """Environment used to train autonomous vehicles to improve traffic flows + """Fully observed acceleration environment. + + This environment used to train autonomous vehicles to improve traffic flows when acceleration actions are permitted by the rl agent. Required from env_params: @@ -54,6 +58,7 @@ def __init__(self, env_params, sumo_params, scenario): @property def action_space(self): + """See class definition.""" return Box( low=-abs(self.env_params.additional_params["max_decel"]), high=self.env_params.additional_params["max_accel"], @@ -62,6 +67,7 @@ def action_space(self): @property def observation_space(self): + """See class definition.""" self.obs_var_labels = ["Velocity", "Absolute_pos"] speed = Box( low=0, @@ -76,6 +82,7 @@ def observation_space(self): return Tuple((speed, pos)) def _apply_rl_actions(self, rl_actions): + """See class definition.""" sorted_rl_ids = [ veh_id for veh_id in self.sorted_ids if veh_id in self.vehicles.get_rl_ids() @@ -83,12 +90,14 @@ def _apply_rl_actions(self, rl_actions): self.apply_acceleration(sorted_rl_ids, rl_actions) def compute_reward(self, state, rl_actions, **kwargs): + """See class definition.""" if self.env_params.evaluate: return np.mean(self.vehicles.get_speed(self.vehicles.get_ids())) else: return rewards.desired_velocity(self, fail=kwargs["fail"]) def get_state(self, **kwargs): + """See class definition.""" # speed normalizer max_speed = self.scenario.max_speed @@ -98,6 +107,7 @@ def get_state(self, **kwargs): ] for veh_id in self.sorted_ids]) def additional_command(self): + """Define which vehicles are observed for visualization purposes.""" # specify observed vehicles if self.vehicles.num_rl_vehicles > 0: for veh_id in self.vehicles.get_human_ids(): diff --git a/flow/envs/loop/loop_merges.py b/flow/envs/loop/loop_merges.py index 658be2622..a88f75770 100755 --- a/flow/envs/loop/loop_merges.py +++ b/flow/envs/loop/loop_merges.py @@ -1,3 +1,5 @@ +"""Environment for training cooperative merging behaviors in a loop merge.""" + from flow.envs.base_env import Env from flow.core import rewards @@ -23,8 +25,7 @@ class TwoLoopsMergePOEnv(Env): - """Environment for training cooperative merging behavior in a partially - observable closed loop merge scenario. + """Environment for training cooperative merging behaviors in a loop merge. WARNING: only supports 1 RL vehicle @@ -81,6 +82,7 @@ def __init__(self, env_params, sumo_params, scenario): @property def observation_space(self): + """See class definition.""" speed = Box( low=0, high=np.inf, @@ -99,6 +101,7 @@ def observation_space(self): @property def action_space(self): + """See class definition.""" return Box( low=-np.abs(self.env_params.additional_params["max_decel"]), high=self.env_params.additional_params["max_accel"], @@ -106,6 +109,7 @@ def action_space(self): dtype=np.float32) def _apply_rl_actions(self, rl_actions): + """See class definition.""" sorted_rl_ids = [ veh_id for veh_id in self.sorted_ids if veh_id in self.vehicles.get_rl_ids() @@ -113,6 +117,7 @@ def _apply_rl_actions(self, rl_actions): self.apply_acceleration(sorted_rl_ids, rl_actions) def compute_reward(self, state, rl_actions, **kwargs): + """See class definition.""" vel_reward = rewards.desired_velocity(self, fail=kwargs["fail"]) # Use a similar weighting of of the headway reward as the velocity @@ -127,6 +132,7 @@ def compute_reward(self, state, rl_actions, **kwargs): return vel_reward + headway_reward def get_state(self, **kwargs): + """See class definition.""" vel = np.zeros(self.n_obs_vehicles) pos = np.zeros(self.n_obs_vehicles) @@ -206,7 +212,7 @@ def get_state(self, **kwargs): def sort_by_position(self): """ - See parent class + See parent class. Instead of being sorted by a global reference, vehicles in this environment are sorted with regards to which ring this currently diff --git a/flow/envs/loop/wave_attenuation.py b/flow/envs/loop/wave_attenuation.py index 5f44fa685..8be89b693 100644 --- a/flow/envs/loop/wave_attenuation.py +++ b/flow/envs/loop/wave_attenuation.py @@ -1,3 +1,13 @@ +""" +Environment used to train a stop-and-go dissipating controller. + +This is the environment that was used in: + +C. Wu, A. Kreidieh, K. Parvate, E. Vinitsky, A. Bayen, "Flow: Architecture and +Benchmarking for Reinforcement Learning in Traffic Control," CoRR, vol. +abs/1710.05465, 2017. [Online]. Available: https://arxiv.org/abs/1710.05465 +""" + from flow.envs.base_env import Env from flow.core.params import InitialConfig, NetParams @@ -20,8 +30,10 @@ class WaveAttenuationEnv(Env): - """Environment used to train autonomous vehicles to attenuate the formation - and propagation of waves in a variable density ring road. + """Fully observable wave attenuation environment. + + This environment is used to train autonomous vehicles to attenuate the + formation and propagation of waves in a variable density ring road. Required from env_params: @@ -57,6 +69,7 @@ def __init__(self, env_params, sumo_params, scenario): @property def action_space(self): + """See class definition.""" return Box( low=-np.abs(self.env_params.additional_params["max_decel"]), high=self.env_params.additional_params["max_accel"], @@ -65,6 +78,7 @@ def action_space(self): @property def observation_space(self): + """See class definition.""" self.obs_var_labels = ["Velocity", "Absolute_pos"] speed = Box( low=0, @@ -79,6 +93,7 @@ def observation_space(self): return Tuple((speed, pos)) def _apply_rl_actions(self, rl_actions): + """See class definition.""" sorted_rl_ids = [ veh_id for veh_id in self.sorted_ids if veh_id in self.vehicles.get_rl_ids() @@ -86,6 +101,7 @@ def _apply_rl_actions(self, rl_actions): self.apply_acceleration(sorted_rl_ids, rl_actions) def compute_reward(self, state, rl_actions, **kwargs): + """See class definition.""" # in the warmup steps if rl_actions is None: return 0 @@ -113,20 +129,25 @@ def compute_reward(self, state, rl_actions, **kwargs): return float(reward) def get_state(self, **kwargs): + """See class definition.""" return np.array([[ self.vehicles.get_speed(veh_id) / self.scenario.max_speed, self.get_x_by_id(veh_id) / self.scenario.length ] for veh_id in self.sorted_ids]) def additional_command(self): + """Define which vehicles are observed for visualization purposes.""" # specify observed vehicles if self.vehicles.num_rl_vehicles > 0: for veh_id in self.vehicles.get_human_ids(): self.vehicles.set_observed(veh_id) def reset(self): - """The sumo instance is reset with a new ring length, and a number of - steps are performed with the rl vehicle acting as a human vehicle.""" + """See parent class. + + The sumo instance is reset with a new ring length, and a number of + steps are performed with the rl vehicle acting as a human vehicle. + """ # update the scenario initial_config = InitialConfig(bunching=50, min_gap=0) additional_net_params = { @@ -216,9 +237,11 @@ class WaveAttenuationPOEnv(WaveAttenuationEnv): @property def observation_space(self): + """See class definition.""" return Box(low=0, high=1, shape=(3, ), dtype=np.float32) def get_state(self, **kwargs): + """See class definition.""" rl_id = self.vehicles.get_rl_ids()[0] lead_id = self.vehicles.get_leader(rl_id) or rl_id @@ -236,6 +259,7 @@ def get_state(self, **kwargs): return observation def additional_command(self): + """Define which vehicles are observed for visualization purposes.""" # specify observed vehicles rl_id = self.vehicles.get_rl_ids()[0] lead_id = self.vehicles.get_leader(rl_id) or rl_id diff --git a/flow/envs/merge.py b/flow/envs/merge.py index 6d572bff5..bbd0bb081 100644 --- a/flow/envs/merge.py +++ b/flow/envs/merge.py @@ -1,3 +1,10 @@ +""" +Environments for training vehicles to reduce congestion in a merge. + +This environment was used in: +TODO(ak): add paper after it has been published. +""" + from flow.envs.base_env import Env from flow.core import rewards @@ -19,8 +26,10 @@ class WaveAttenuationMergePOEnv(Env): - """Environment used to train autonomous vehicles to attenuate the formation - and propagation of waves in an open merge network. + """Partially observable merge environment. + + This environment is used to train autonomous vehicles to attenuate the + formation and propagation of waves in an open merge network. Required from env_params: @@ -81,6 +90,7 @@ def __init__(self, env_params, sumo_params, scenario): @property def action_space(self): + """See class definition.""" return Box( low=-abs(self.env_params.additional_params["max_decel"]), high=self.env_params.additional_params["max_accel"], @@ -89,9 +99,11 @@ def action_space(self): @property def observation_space(self): + """See class definition.""" return Box(low=0, high=1, shape=(5 * self.num_rl, ), dtype=np.float32) def _apply_rl_actions(self, rl_actions): + """See class definition.""" for i, rl_id in enumerate(self.rl_veh): # ignore rl vehicles outside the network if rl_id not in self.vehicles.get_rl_ids(): @@ -99,6 +111,7 @@ def _apply_rl_actions(self, rl_actions): self.apply_acceleration([rl_id], [rl_actions[i]]) def get_state(self, rl_id=None, **kwargs): + """See class definition.""" self.leader = [] self.follower = [] @@ -140,6 +153,7 @@ def get_state(self, rl_id=None, **kwargs): return observation def compute_reward(self, state, rl_actions, **kwargs): + """See class definition.""" if self.env_params.evaluate: return np.mean(self.vehicles.get_speed(self.vehicles.get_ids())) else: @@ -168,11 +182,27 @@ def compute_reward(self, state, rl_actions, **kwargs): return max(eta1 * cost1 + eta2 * cost2, 0) def sort_by_position(self): + """See parent class. + + Sorting occurs by the ``get_x_by_id`` method instead of + ``get_absolute_position``. + """ # vehicles are sorted by their get_x_by_id value sorted_ids = sorted(self.vehicles.get_ids(), key=self.get_x_by_id) return sorted_ids, None def additional_command(self): + """See parent class. + + This method performs to auxiliary tasks: + + * Define which vehicles are observed for visualization purposes. + * Maintains the "rl_veh" and "rl_queue" variables to ensure the RL + vehicles that are represented in the state space does not change + until one of the vehicles in the state space leaves the network. + Then, the next vehicle in the queue is added to the state space and + provided with actions from the policy. + """ # add rl vehicles that just entered the network into the rl queue for veh_id in self.vehicles.get_rl_ids(): if veh_id not in list(self.rl_queue) + self.rl_veh: @@ -196,6 +226,11 @@ def additional_command(self): self.vehicles.set_observed(veh_id) def reset(self): + """See parent class. + + In addition, a few variables that are specific to this class are + emptied before they are used by the new rollout. + """ self.leader = [] self.follower = [] return super().reset() diff --git a/flow/envs/test.py b/flow/envs/test.py index bfc8b2411..21483b8bf 100644 --- a/flow/envs/test.py +++ b/flow/envs/test.py @@ -1,3 +1,5 @@ +"""Test environment used to run simulations in the absence of autonomy.""" + from flow.envs.base_env import Env from gym.spaces.box import Box import numpy as np @@ -29,17 +31,22 @@ class and returns a real number. @property def action_space(self): + """See class definition.""" return Box(low=0, high=0, shape=0, dtype=np.float32) @property def observation_space(self): + """See class definition.""" return Box(low=0, high=0, shape=0, dtype=np.float32) def _apply_rl_actions(self, rl_actions): + """See class definition.""" return def compute_reward(self, state, rl_actions, **kwargs): + """See class definition.""" return 0 def get_state(self, **kwargs): + """See class definition.""" return np.array([])