Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
199 changes: 199 additions & 0 deletions flow/utils/leaderboard/evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
from flow.core.experiment import SumoExperiment
from flow.core.params import InitialConfig
from flow.core.traffic_lights import TrafficLights
from flow.utils.rllib import get_rllib_config, get_flow_params
from flow.utils.registry import make_create_env

from flow.benchmarks.grid0 import flow_params as grid0
from flow.benchmarks.grid1 import flow_params as grid1
from flow.benchmarks.bottleneck0 import flow_params as bottleneck0
from flow.benchmarks.bottleneck1 import flow_params as bottleneck1
from flow.benchmarks.bottleneck2 import flow_params as bottleneck2
from flow.benchmarks.figureeight0 import flow_params as figureeight0
from flow.benchmarks.figureeight1 import flow_params as figureeight1
from flow.benchmarks.figureeight2 import flow_params as figureeight2
from flow.benchmarks.merge0 import flow_params as merge0
from flow.benchmarks.merge1 import flow_params as merge1
from flow.benchmarks.merge2 import flow_params as merge2

import ray
from ray.rllib.agent import get_agent_class
from ray.tune.registry import get_registry, register_env
import numpy as np
import joblib

# number of simulations to execute when computing performance scores
NUM_RUNS = 10

# dictionary containing all available benchmarks and their meta-parameters
AVAILABLE_BENCHMARKS = {"grid0": grid0,
"grid1": grid1,
"bottleneck0": bottleneck0,
"bottleneck1": bottleneck1,
"bottleneck2": bottleneck2,
"figureeight0": figureeight0,
"figureeight1": figureeight1,
"figureeight2": figureeight2,
"merge0": merge0,
"merge1": merge1,
"merge2": merge2}


def evaluate_policy(benchmark, _get_actions, _get_states=None):
"""Evaluates the performance of a controller on a predefined traffic
benchmark.

Parameters
----------
benchmark : str
name of the benchmark, must be printed as it is in the
benchmarks folder; otherwise a ValueError will be raised
_get_actions : method
the mapping from states to actions for the RL agent(s)
_get_states : method, optional
a mapping from the environment object in Flow to some state, which
overrides the _get_states method of the environment. Note that the
same cannot be done for the actions.

Returns
-------
float
mean of the evaluation return of the benchmark from NUM_RUNS number
of simulations
float
standard deviation of the evaluation return of the benchmark from
NUM_RUNS number of simulations

Raises
------
ValueError
If the specified benchmark is not available.
"""
if benchmark not in AVAILABLE_BENCHMARKS.keys():
raise ValueError("benchmark {} is not available. Check spelling?".
format(benchmark))

# get the flow params from the benchmark
flow_params = AVAILABLE_BENCHMARKS[benchmark]

exp_tag = flow_params["exp_tag"]
sumo_params = flow_params["sumo"]
vehicles = flow_params["veh"]
env_params = flow_params["env"]
env_params.evaluate = True # Set to true to get evaluation returns
net_params = flow_params["net"]
initial_config = flow_params.get("initial", InitialConfig())
traffic_lights = flow_params.get("tls", TrafficLights())

# import the environment, scenario, and generator classes
module = __import__("flow.envs", fromlist=[flow_params["env_name"]])
env_class = getattr(module, flow_params["env_name"])
module = __import__("flow.scenarios", fromlist=[flow_params["scenario"]])
scenario_class = getattr(module, flow_params["scenario"])
module = __import__("flow.scenarios", fromlist=[flow_params["generator"]])
generator_class = getattr(module, flow_params["generator"])

# recreate the scenario and environment
scenario = scenario_class(name=exp_tag,
generator_class=generator_class,
vehicles=vehicles,
net_params=net_params,
initial_config=initial_config,
traffic_lights=traffic_lights)

# make sure the _get_states method of the environment is the one
# specified by the user
if _get_states is not None:
class _env_class(env_class):
def get_state(self):
return _get_states(self)

env_class = _env_class

env = env_class(env_params=env_params,
sumo_params=sumo_params,
scenario=scenario)

# create a SumoExperiment object with the "rl_actions" method as
# described in the inputs. Note that the state may not be that which is
# specified by the environment.
exp = SumoExperiment(env=env, scenario=scenario)

# run the experiment and return the reward
res = exp.run(num_runs=NUM_RUNS, num_steps=env.env_params.horizon,
rl_actions=_get_actions)

return np.mean(res["returns"]), np.std(res["returns"])


def get_compute_action_rllab(path_to_pkl):
"""Collects the compute_action method from rllab's pkl files.

Parameters
----------
path_to_pkl : str
pkl file created by rllab that contains the policy information

Returns
-------
method
the compute_action method from the algorithm along with the trained
parameters
"""
# get the agent/policy
data = joblib.load(path_to_pkl)
agent = data['policy']

# restore the trained parameters
agent.restore()

# the compute action return an action and an info_dict, so modify to just
# return the action
def compute_action(state):
return agent.compute_action(state)[0]

return compute_action


def get_compute_action_rllib(path_to_dir, checkpoint_num, alg):
"""Collects the compute_action method from RLlib's serialized files.

Parameters
----------
path_to_dir : str
RLlib directory containing training results
checkpoint_num : int
checkpoint number / training iteration of the learned policy
alg : str
name of the RLlib algorithm that was used during the training
procedure

Returns
-------
method
the compute_action method from the algorithm along with the trained
parameters
"""
# collect the configuration information from the RLlib checkpoint
result_dir = path_to_dir if path_to_dir[-1] != '/' else path_to_dir[:-1]
config = get_rllib_config(result_dir)

# run on only one cpu for rendering purposes
ray.init(num_cpus=1)
config["num_workers"] = 1

# create and register a gym+rllib env
flow_params = get_flow_params(config)
create_env, env_name = make_create_env(params=flow_params, version=9999,
sumo_binary="sumo")
register_env(env_name, create_env)

# recreate the agent
agent_cls = get_agent_class(alg)
agent = agent_cls(env=env_name, registry=get_registry(), config=config)

# restore the trained parameters into the policy
checkpoint = result_dir + '/checkpoint-{}'.format(checkpoint_num)
agent._restore(checkpoint)

return agent.compute_action
12 changes: 12 additions & 0 deletions flow/utils/leaderboard/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from solution import BENCHMARK, get_actions, get_states
from evaluate import evaluate_policy
import sys
PATH = sys.argv[1]
sys.path.append(PATH)

# Evaluate the solution
mean, stdev = evaluate_policy(benchmark=BENCHMARK,
_get_actions=get_actions,
_get_states=get_states)
# Print results
print(mean, stdev)
21 changes: 21 additions & 0 deletions flow/utils/leaderboard/solution_template.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""
A solution should include:
1. Benchmark scenario;
2. get_actions() method;
3. get_states() method.
"""

# Specify benchmark scenario below.
BENCHMARK = "" # Benchmark name goes here...


# Specify get_action() method below.
def get_actions(state):
# get_actions() code goes here...
return


# Specify get_state() method below.
def get_states(env, **kwargs):
# get_states() code goes here...
return