Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion flow/envs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Contains all callable environments in Flow."""

from flow.envs.base_env import Env
from flow.envs.bay_bridge import BayBridgeEnv
from flow.envs.bay_bridge.base import BayBridgeEnv
from flow.envs.bottleneck_env import BottleNeckAccelEnv, BottleneckEnv, \
DesiredVelocityEnv
from flow.envs.green_wave_env import TrafficLightGridEnv, \
Expand Down
112 changes: 61 additions & 51 deletions flow/envs/base_env.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Base environment class. This is the parent of all other environments."""

import logging
import os
import signal
Expand Down Expand Up @@ -35,35 +37,36 @@


class Env(gym.Env, Serializable):
"""Base environment class.

Provides the interface for controlling a SUMO simulation. Using this
class, you can start sumo, provide a scenario to specify a
configuration and controllers, perform simulation steps, and reset the
simulation to an initial configuration.

Env is Serializable to allow for pickling and replaying of the policy.

This class cannot be used as is: you must extend it to implement an
action applicator method, and properties to define the MDP if you
choose to use it with an rl library (e.g. RLlib). This can be done by
overloading the following functions in a child class:
- action_space
- observation_space
- apply_rl_action
- get_state
- compute_reward

Attributes
----------
env_params: EnvParams type:
see flow/core/params.py
sumo_params: SumoParams type
see flow/core/params.py
scenario: Scenario type
see flow/scenarios/base_scenario.py
"""

def __init__(self, env_params, sumo_params, scenario):
"""Base environment class.

Provides the interface for controlling a SUMO simulation. Using this
class, you can start sumo, provide a scenario to specify a
configuration and controllers, perform simulation steps, and reset the
simulation to an initial configuration.

Env is Serializable to allow for pickling and replaying of the policy.

This class cannot be used as is: you must extend it to implement an
action applicator method, and properties to define the MDP if you
choose to use it with an rl library (e.g. RLlib). This can be done by
overloading the following functions in a child class:
- action_space
- observation_space
- apply_rl_action
- get_state
- compute_reward

Attributes
----------
env_params: EnvParams type:
see flow/core/params.py
sumo_params: SumoParams type
see flow/core/params.py
scenario: Scenario type
see flow/scenarios/base_scenario.py
"""
# Invoke serializable if using rllab
if Serializable is not object:
Serializable.quick_init(self, locals())
Expand Down Expand Up @@ -128,7 +131,7 @@ def __init__(self, env_params, sumo_params, scenario):
self.setup_initial_state()

def restart_sumo(self, sumo_params, sumo_binary=None):
"""Restarts an already initialized sumo instance.
"""Restart an already initialized sumo instance.

This is used when visualizing a rollout, in order to update the
sumo_binary with potentially a gui and export emission data from sumo.
Expand Down Expand Up @@ -157,7 +160,7 @@ def restart_sumo(self, sumo_params, sumo_binary=None):
self.setup_initial_state()

def start_sumo(self):
"""Starts a sumo instance.
"""Start a sumo instance.

Uses the configuration files created by the generator class to
initialize a sumo instance. Also initializes a traci connection to
Expand Down Expand Up @@ -254,11 +257,11 @@ def start_sumo(self):
raise error

def setup_initial_state(self):
"""Returns information on the initial state of the vehicles in the
network, to be used upon reset.
"""Return information on the initial state of vehicles in the network.

Also adds initial state information to the self.vehicles class and
starts a subscription with sumo to collect state information each step.
This information is to be used upon reset. This method also adds this
information to the self.vehicles class and starts a subscription with
sumo to collect state information each step.

Returns
-------
Expand Down Expand Up @@ -348,7 +351,7 @@ def setup_initial_state(self):
self.vehicles.update(vehicle_obs, id_lists, self)

def step(self, rl_actions):
"""Advances the environment by one step.
"""Advance the environment by one step.

Assigns actions to autonomous and human-driven agents (i.e. vehicles,
traffic lights, etc...). Actions that are not assigned are left to the
Expand Down Expand Up @@ -460,7 +463,7 @@ def step(self, rl_actions):
return next_observation, reward, crash, {}

def reset(self):
"""Resets the environment.
"""Reset the environment.

This method is performed in between rollouts. It resets the state of
the environment, and re-initializes the vehicles in their starting
Expand Down Expand Up @@ -619,7 +622,7 @@ def additional_command(self):
pass

def apply_rl_actions(self, rl_actions=None):
"""Specifies the actions to be performed by the rl agent(s).
"""Specify the actions to be performed by the rl agent(s).

If no actions are provided at any given step, the rl agents default to
performing actions specified by sumo.
Expand All @@ -646,7 +649,7 @@ def _apply_rl_actions(self, rl_actions):
raise NotImplementedError

def apply_acceleration(self, veh_ids, acc):
"""Applies the acceleration requested by a vehicle in sumo.
"""Apply the acceleration requested by a vehicle in sumo.

Note that, if the sumo-specified speed mode of the vehicle is not
"aggressive", the acceleration may be clipped by some safety velocity
Expand All @@ -666,8 +669,12 @@ def apply_acceleration(self, veh_ids, acc):
self.traci_connection.vehicle.slowDown(vid, next_vel, 1)

def apply_lane_change(self, veh_ids, direction):
"""Applies an instantaneous lane-change to a set of vehicles, while
preventing vehicles from moving to lanes that do not exist.
"""Apply an instantaneous lane-change to a set of vehicles.

This method also prevents vehicles from moving to lanes that do not
exist, and set the "last_lc" variable for RL vehicles that lane changed
to match the current time step, in order to assist in maintaining a
lane change duration for these vehicles.

Parameters
----------
Expand Down Expand Up @@ -711,7 +718,7 @@ def apply_lane_change(self, veh_ids, direction):
self.vehicles.get_state(veh_id, "last_lc")

def choose_routes(self, veh_ids, route_choices):
"""Updates the route choice of vehicles in the network.
"""Update the route choice of vehicles in the network.

Parameters
----------
Expand All @@ -728,8 +735,11 @@ def choose_routes(self, veh_ids, route_choices):
vehID=veh_id, edgeList=route_choices[i])

def get_x_by_id(self, veh_id):
"""Provides a 1-dimensional representation of the position of a vehicle
in the network.
"""Provide a 1-D representation of the position of a vehicle.

Note: These values are only meaningful if the specify_edge_starts
method in the scenario is set appropriately; otherwise, a value of 0 is
returned for all vehicles.

Parameters
----------
Expand All @@ -748,7 +758,7 @@ def get_x_by_id(self, veh_id):
self.vehicles.get_edge(veh_id), self.vehicles.get_position(veh_id))

def sort_by_position(self):
"""Sorts the vehicle ids of vehicles in the network by position.
"""Sort the vehicle ids of vehicles in the network by position.

The base environment does this by sorting vehicles by their absolute
position.
Expand All @@ -771,7 +781,7 @@ def sort_by_position(self):
return self.vehicles.get_ids(), None

def update_vehicle_colors(self):
"""Modifies the color of vehicles if rendering is active.
"""Modify the color of vehicles if rendering is active.

The colors of all vehicles are updated as follows:
- red: autonomous (rl) vehicles
Expand Down Expand Up @@ -809,7 +819,7 @@ def update_vehicle_colors(self):
self.vehicles.remove_observed(veh_id)

def get_state(self):
"""Returns the state of the simulation as perceived by the RL agent.
"""Return the state of the simulation as perceived by the RL agent.

MUST BE implemented in new environments.

Expand All @@ -823,8 +833,7 @@ def get_state(self):

@property
def action_space(self):
"""Identifies the dimensions and bounds of the action space (needed for
gym environments).
"""Identify the dimensions and bounds of the action space.

MUST BE implemented in new environments.

Expand All @@ -837,8 +846,7 @@ def action_space(self):

@property
def observation_space(self):
"""Identifies the dimensions and bounds of the observation space
(needed for gym environments).
"""Identify the dimensions and bounds of the observation space.

MUST BE implemented in new environments.

Expand Down Expand Up @@ -873,7 +881,7 @@ def compute_reward(self, state, rl_actions, **kwargs):
return 0

def terminate(self):
"""Closes the TraCI I/O connection.
"""Close the TraCI I/O connection.

Should be done at end of every experiment. Must be in Env because the
environment opens the TraCI connection.
Expand All @@ -885,6 +893,7 @@ def _close(self):
self.scenario.close()

def teardown_sumo(self):
"""Kill the sumo subprocess instance."""
try:
os.killpg(self.sumo_proc.pid, signal.SIGTERM)
except Exception:
Expand All @@ -894,4 +903,5 @@ def _seed(self, seed=None):
return []

def render(self, mode='human'):
"""See parent class (gym.Env)."""
pass
3 changes: 0 additions & 3 deletions flow/envs/bay_bridge/__init__.py

This file was deleted.

3 changes: 3 additions & 0 deletions flow/envs/bay_bridge/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,12 +209,15 @@ def apply_toll_bridge_control(self):

# TODO: decide on a good reward function
def compute_reward(self, state, rl_actions, **kwargs):
"""See class definition."""
return np.mean(self.vehicles.get_speed(self.vehicles.get_ids()))

""" The below methods need to be updated by child classes. """

def _apply_rl_actions(self, rl_actions):
"""Implemented by child classes."""
pass

def get_state(self):
"""Implemented by child classes."""
return []
20 changes: 18 additions & 2 deletions flow/envs/bottleneck_env.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
"""
Environments for training vehicles to reduce capacity drops in a bottleneck.

This environment was used in:
TODO(ak): add paper after it has been published.
"""

from flow.controllers.rlcontroller import RLController
from flow.controllers.lane_change_controllers import SumoLaneChangeController
from flow.controllers.routing_controllers import ContinuousRouter
Expand Down Expand Up @@ -347,6 +354,7 @@ def get_avg_bottleneck_velocity(self):
# Dummy action and observation spaces
@property
def action_space(self):
"""See class definition."""
return Box(
low=-float("inf"),
high=float("inf"),
Expand All @@ -355,6 +363,7 @@ def action_space(self):

@property
def observation_space(self):
"""See class definition."""
return Box(
low=-float("inf"),
high=float("inf"),
Expand All @@ -369,6 +378,7 @@ def compute_reward(self, state, rl_actions, **kwargs):
return reward

def get_state(self):
"""See class definition."""
return np.asarray([1])


Expand Down Expand Up @@ -411,6 +421,7 @@ def __init__(self, env_params, sumo_params, scenario):

@property
def observation_space(self):
"""See class definition."""
num_edges = len(self.scenario.get_edge_list())
num_rl_veh = self.num_rl
num_obs = 2 * num_edges + 4 * MAX_LANES * self.scaling \
Expand All @@ -419,6 +430,7 @@ def observation_space(self):
return Box(low=0, high=1, shape=(num_obs, ), dtype=np.float32)

def get_state(self):
"""See class definition."""
headway_scale = 1000

rl_ids = self.vehicles.get_rl_ids()
Expand Down Expand Up @@ -521,6 +533,7 @@ def get_state(self):
return np.concatenate((rl_obs, relative_obs, edge_obs))

def compute_reward(self, state, rl_actions, **kwargs):
"""See class definition."""
num_rl = self.vehicles.num_rl_vehicles
lane_change_acts = np.abs(np.round(rl_actions[1::2])[:num_rl])
return (rewards.desired_velocity(self) + rewards.rl_forward_progress(
Expand All @@ -536,7 +549,7 @@ def sort_by_position(self):

def _apply_rl_actions(self, actions):
"""
See parent class
See parent class.

Takes a tuple and applies a lane change or acceleration. if a lane
change is applied, don't issue any commands
Expand Down Expand Up @@ -711,6 +724,7 @@ def __init__(self, env_params, sumo_params, scenario):

@property
def observation_space(self):
"""See class definition."""
num_obs = 0
# density and velocity for rl and non-rl vehicles per segment
# Last element is the outflow
Expand All @@ -721,6 +735,7 @@ def observation_space(self):

@property
def action_space(self):
"""See class definition."""
if self.symmetric:
action_size = self.total_controlled_segments
else:
Expand All @@ -733,6 +748,7 @@ def action_space(self):
low=-1.5, high=1.0, shape=(int(action_size), ), dtype=np.float32)

def get_state(self):
"""See class definition."""
# action space is number of vehicles in each segment in each lane,
# number of rl vehicles in each segment in each lane
# mean speed in each segment, and mean rl speed in each
Expand Down Expand Up @@ -829,7 +845,7 @@ def _apply_rl_actions(self, rl_actions):
self.traci_connection.vehicle.setMaxSpeed(rl_id, 23.0)

def compute_reward(self, state, rl_actions, **kwargs):
""" Outflow rate over last ten seconds normalized to max of 1 """
"""Outflow rate over last ten seconds normalized to max of 1."""

if self.env_params.evaluate:
reward = self.vehicles.get_outflow_rate(500)
Expand Down
Loading