From 2f4070492c53af96649089497baa47fa756eb46c Mon Sep 17 00:00:00 2001 From: Fangyu Wu Date: Wed, 22 Aug 2018 23:14:36 -0700 Subject: [PATCH 1/4] Add leaderboard utility. --- flow/utils/leaderboard/evaluate.py | 199 ++++++++++++++++++++ flow/utils/leaderboard/run.py | 13 ++ flow/utils/leaderboard/solution_template.py | 21 +++ 3 files changed, 233 insertions(+) create mode 100644 flow/utils/leaderboard/evaluate.py create mode 100644 flow/utils/leaderboard/run.py create mode 100644 flow/utils/leaderboard/solution_template.py diff --git a/flow/utils/leaderboard/evaluate.py b/flow/utils/leaderboard/evaluate.py new file mode 100644 index 000000000..a9be31e3f --- /dev/null +++ b/flow/utils/leaderboard/evaluate.py @@ -0,0 +1,199 @@ +from flow.core.experiment import SumoExperiment +from flow.core.params import InitialConfig +from flow.core.traffic_lights import TrafficLights +from flow.utils.rllib import get_rllib_config, get_flow_params +from flow.utils.registry import make_create_env + +from flow.benchmarks.grid0 import flow_params as grid0 +from flow.benchmarks.grid1 import flow_params as grid1 +from flow.benchmarks.bottleneck0 import flow_params as bottleneck0 +from flow.benchmarks.bottleneck1 import flow_params as bottleneck1 +from flow.benchmarks.bottleneck2 import flow_params as bottleneck2 +from flow.benchmarks.figureeight0 import flow_params as figureeight0 +from flow.benchmarks.figureeight1 import flow_params as figureeight1 +from flow.benchmarks.figureeight2 import flow_params as figureeight2 +from flow.benchmarks.merge0 import flow_params as merge0 +from flow.benchmarks.merge1 import flow_params as merge1 +from flow.benchmarks.merge2 import flow_params as merge2 + +import ray +from ray.rllib.agent import get_agent_class +from ray.tune.registry import get_registry, register_env +import numpy as np +import joblib + +# number of simulations to execute when computing performance scores +NUM_RUNS = 10 + +# dictionary containing all available benchmarks and their meta-parameters +AVAILABLE_BENCHMARKS = {"grid0": grid0, + "grid1": grid1, + "bottleneck0": bottleneck0, + "bottleneck1": bottleneck1, + "bottleneck2": bottleneck2, + "figureeight0": figureeight0, + "figureeight1": figureeight1, + "figureeight2": figureeight2, + "merge0": merge0, + "merge1": merge1, + "merge2": merge2} + + +def evaluate_policy(benchmark, _get_actions, _get_states=None): + """Evaluates the performance of a controller on a predefined traffic + benchmark. + + Parameters + ---------- + benchmark : str + name of the benchmark, must be printed as it is in the + benchmarks folder; otherwise a ValueError will be raised + _get_actions : method + the mapping from states to actions for the RL agent(s) + _get_states : method, optional + a mapping from the environment object in Flow to some state, which + overrides the _get_states method of the environment. Note that the + same cannot be done for the actions. + + Returns + ------- + float + mean of the evaluation return of the benchmark from NUM_RUNS number + of simulations + float + standard deviation of the evaluation return of the benchmark from + NUM_RUNS number of simulations + + Raises + ------ + ValueError + If the specified benchmark is not available. + """ + if benchmark not in AVAILABLE_BENCHMARKS.keys(): + raise ValueError("benchmark {} is not available. Check spelling?". + format(benchmark)) + + # get the flow params from the benchmark + flow_params = AVAILABLE_BENCHMARKS[benchmark] + + exp_tag = flow_params["exp_tag"] + sumo_params = flow_params["sumo"] + vehicles = flow_params["veh"] + env_params = flow_params["env"] + env_params.evaluate = True # Set to true to get evaluation returns + net_params = flow_params["net"] + initial_config = flow_params.get("initial", InitialConfig()) + traffic_lights = flow_params.get("tls", TrafficLights()) + + # import the environment, scenario, and generator classes + module = __import__("flow.envs", fromlist=[flow_params["env_name"]]) + env_class = getattr(module, flow_params["env_name"]) + module = __import__("flow.scenarios", fromlist=[flow_params["scenario"]]) + scenario_class = getattr(module, flow_params["scenario"]) + module = __import__("flow.scenarios", fromlist=[flow_params["generator"]]) + generator_class = getattr(module, flow_params["generator"]) + + # recreate the scenario and environment + scenario = scenario_class(name=exp_tag, + generator_class=generator_class, + vehicles=vehicles, + net_params=net_params, + initial_config=initial_config, + traffic_lights=traffic_lights) + + # make sure the _get_states method of the environment is the one + # specified by the user + if _get_states is not None: + class _env_class(env_class): + def get_state(self): + return _get_states(self) + + env_class = _env_class + + env = env_class(env_params=env_params, + sumo_params=sumo_params, + scenario=scenario) + + # create a SumoExperiment object with the "rl_actions" method as + # described in the inputs. Note that the state may not be that which is + # specified by the environment. + exp = SumoExperiment(env=env, scenario=scenario) + + # run the experiment and return the reward + res = exp.run(num_runs=NUM_RUNS, num_steps=env.env_params.horizon, + rl_actions=_get_actions) + + return np.mean(res["returns"]), np.std(res["returns"]) + + +def get_compute_action_rllab(path_to_pkl): + """Collects the compute_action method from rllab's pkl files. + + Parameters + ---------- + path_to_pkl : str + pkl file created by rllab that contains the policy information + + Returns + ------- + method + the compute_action method from the algorithm along with the trained + parameters + """ + # get the agent/policy + data = joblib.load(path_to_pkl) + agent = data['policy'] + + # restore the trained parameters + agent.restore() + + # the compute action return an action and an info_dict, so modify to just + # return the action + def compute_action(state): + return agent.compute_action(state)[0] + + return compute_action + + +def get_compute_action_rllib(path_to_dir, checkpoint_num, alg): + """Collects the compute_action method from RLlib's serialized files. + + Parameters + ---------- + path_to_dir : str + RLlib directory containing training results + checkpoint_num : int + checkpoint number / training iteration of the learned policy + alg : str + name of the RLlib algorithm that was used during the training + procedure + + Returns + ------- + method + the compute_action method from the algorithm along with the trained + parameters + """ + # collect the configuration information from the RLlib checkpoint + result_dir = path_to_dir if path_to_dir[-1] != '/' else path_to_dir[:-1] + config = get_rllib_config(result_dir) + + # run on only one cpu for rendering purposes + ray.init(num_cpus=1) + config["num_workers"] = 1 + + # create and register a gym+rllib env + flow_params = get_flow_params(config) + create_env, env_name = make_create_env(params=flow_params, version=9999, + sumo_binary="sumo") + register_env(env_name, create_env) + + # recreate the agent + agent_cls = get_agent_class(alg) + agent = agent_cls(env=env_name, registry=get_registry(), config=config) + + # restore the trained parameters into the policy + checkpoint = result_dir + '/checkpoint-{}'.format(checkpoint_num) + agent._restore(checkpoint) + + return agent.compute_action diff --git a/flow/utils/leaderboard/run.py b/flow/utils/leaderboard/run.py new file mode 100644 index 000000000..bc1b8fc8f --- /dev/null +++ b/flow/utils/leaderboard/run.py @@ -0,0 +1,13 @@ +import sys +from evaluate import evaluate_policy +PATH = sys.argv[1] +sys.path.append(PATH) +from solution import BENCHMARK, get_actions, get_states + +# Evaluate the solution +mean, stdev = evaluate_policy(benchmark=BENCHMARK, + _get_actions=get_actions, + _get_states=get_states) +# Print results +print(mean, stdev) + diff --git a/flow/utils/leaderboard/solution_template.py b/flow/utils/leaderboard/solution_template.py new file mode 100644 index 000000000..ac394948c --- /dev/null +++ b/flow/utils/leaderboard/solution_template.py @@ -0,0 +1,21 @@ +""" +A solution should include: + 1. Benchmark scenario; + 2. get_actions() method; + 3. get_states() method. +""" + +# Specify benchmark scenario below. +BENCHMARK = "" # Benchmark name goes here... + +# Specify get_action() method below. +def get_actions(state): + # get_actions() code goes here... + + return + +# Specify get_state() method below. +def get_states(env,**kwargs): + # get_states() code goes here... + + return From e7b52580a38518116122e807402ddbb8f5780df4 Mon Sep 17 00:00:00 2001 From: Fangyu Wu Date: Wed, 22 Aug 2018 23:35:03 -0700 Subject: [PATCH 2/4] Fix E402 violation. --- flow/utils/leaderboard/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flow/utils/leaderboard/run.py b/flow/utils/leaderboard/run.py index bc1b8fc8f..3723725b4 100644 --- a/flow/utils/leaderboard/run.py +++ b/flow/utils/leaderboard/run.py @@ -1,8 +1,8 @@ -import sys +from solution import BENCHMARK, get_actions, get_states from evaluate import evaluate_policy +import sys PATH = sys.argv[1] sys.path.append(PATH) -from solution import BENCHMARK, get_actions, get_states # Evaluate the solution mean, stdev = evaluate_policy(benchmark=BENCHMARK, From f6002f4da39b7ea93de07de2e88c53f35db643ab Mon Sep 17 00:00:00 2001 From: Fangyu Wu Date: Wed, 22 Aug 2018 23:45:45 -0700 Subject: [PATCH 3/4] Fix more PEP violations. --- flow/utils/leaderboard/evaluate.py | 2 +- flow/utils/leaderboard/run.py | 1 - flow/utils/leaderboard/solution_template.py | 10 +++++----- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/flow/utils/leaderboard/evaluate.py b/flow/utils/leaderboard/evaluate.py index a9be31e3f..42349ff27 100644 --- a/flow/utils/leaderboard/evaluate.py +++ b/flow/utils/leaderboard/evaluate.py @@ -109,7 +109,7 @@ def get_state(self): return _get_states(self) env_class = _env_class - + env = env_class(env_params=env_params, sumo_params=sumo_params, scenario=scenario) diff --git a/flow/utils/leaderboard/run.py b/flow/utils/leaderboard/run.py index 3723725b4..594d7e38e 100644 --- a/flow/utils/leaderboard/run.py +++ b/flow/utils/leaderboard/run.py @@ -10,4 +10,3 @@ _get_states=get_states) # Print results print(mean, stdev) - diff --git a/flow/utils/leaderboard/solution_template.py b/flow/utils/leaderboard/solution_template.py index ac394948c..e3f33505c 100644 --- a/flow/utils/leaderboard/solution_template.py +++ b/flow/utils/leaderboard/solution_template.py @@ -6,16 +6,16 @@ """ # Specify benchmark scenario below. -BENCHMARK = "" # Benchmark name goes here... +BENCHMARK = "" # Benchmark name goes here... + # Specify get_action() method below. def get_actions(state): # get_actions() code goes here... - return -# Specify get_state() method below. -def get_states(env,**kwargs): - # get_states() code goes here... +# Specify get_state() method below. +def get_states(env, **kwargs): + # get_states() code goes here... return From 8be63bfd33443ff42209606239b659c0705a075c Mon Sep 17 00:00:00 2001 From: Fangyu Wu Date: Wed, 22 Aug 2018 23:52:46 -0700 Subject: [PATCH 4/4] I hope this is the last PEP violation. --- flow/utils/leaderboard/evaluate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/utils/leaderboard/evaluate.py b/flow/utils/leaderboard/evaluate.py index 42349ff27..accc19ae0 100644 --- a/flow/utils/leaderboard/evaluate.py +++ b/flow/utils/leaderboard/evaluate.py @@ -109,7 +109,7 @@ def get_state(self): return _get_states(self) env_class = _env_class - + env = env_class(env_params=env_params, sumo_params=sumo_params, scenario=scenario)