From 2f4070492c53af96649089497baa47fa756eb46c Mon Sep 17 00:00:00 2001
From: Fangyu Wu <fywu85@gmail.com>
Date: Wed, 22 Aug 2018 23:14:36 -0700
Subject: [PATCH 1/4] Add leaderboard utility.

---
 flow/utils/leaderboard/evaluate.py          | 199 ++++++++++++++++++++
 flow/utils/leaderboard/run.py               |  13 ++
 flow/utils/leaderboard/solution_template.py |  21 +++
 3 files changed, 233 insertions(+)
 create mode 100644 flow/utils/leaderboard/evaluate.py
 create mode 100644 flow/utils/leaderboard/run.py
 create mode 100644 flow/utils/leaderboard/solution_template.py

diff --git a/flow/utils/leaderboard/evaluate.py b/flow/utils/leaderboard/evaluate.py
new file mode 100644
index 000000000..a9be31e3f
--- /dev/null
+++ b/flow/utils/leaderboard/evaluate.py
@@ -0,0 +1,199 @@
+from flow.core.experiment import SumoExperiment
+from flow.core.params import InitialConfig
+from flow.core.traffic_lights import TrafficLights
+from flow.utils.rllib import get_rllib_config, get_flow_params
+from flow.utils.registry import make_create_env
+
+from flow.benchmarks.grid0 import flow_params as grid0
+from flow.benchmarks.grid1 import flow_params as grid1
+from flow.benchmarks.bottleneck0 import flow_params as bottleneck0
+from flow.benchmarks.bottleneck1 import flow_params as bottleneck1
+from flow.benchmarks.bottleneck2 import flow_params as bottleneck2
+from flow.benchmarks.figureeight0 import flow_params as figureeight0
+from flow.benchmarks.figureeight1 import flow_params as figureeight1
+from flow.benchmarks.figureeight2 import flow_params as figureeight2
+from flow.benchmarks.merge0 import flow_params as merge0
+from flow.benchmarks.merge1 import flow_params as merge1
+from flow.benchmarks.merge2 import flow_params as merge2
+
+import ray
+from ray.rllib.agent import get_agent_class
+from ray.tune.registry import get_registry, register_env
+import numpy as np
+import joblib
+
+# number of simulations to execute when computing performance scores
+NUM_RUNS = 10
+
+# dictionary containing all available benchmarks and their meta-parameters
+AVAILABLE_BENCHMARKS = {"grid0": grid0,
+                        "grid1": grid1,
+                        "bottleneck0": bottleneck0,
+                        "bottleneck1": bottleneck1,
+                        "bottleneck2": bottleneck2,
+                        "figureeight0": figureeight0,
+                        "figureeight1": figureeight1,
+                        "figureeight2": figureeight2,
+                        "merge0": merge0,
+                        "merge1": merge1,
+                        "merge2": merge2}
+
+
+def evaluate_policy(benchmark, _get_actions, _get_states=None):
+    """Evaluates the performance of a controller on a predefined traffic
+    benchmark.
+
+    Parameters
+    ----------
+        benchmark : str
+            name of the benchmark, must be printed as it is in the
+            benchmarks folder; otherwise a ValueError will be raised
+        _get_actions : method
+            the mapping from states to actions for the RL agent(s)
+        _get_states : method, optional
+            a mapping from the environment object in Flow to some state, which
+            overrides the _get_states method of the environment. Note that the
+            same cannot be done for the actions.
+
+    Returns
+    -------
+        float
+            mean of the evaluation return of the benchmark from NUM_RUNS number
+            of simulations
+        float
+            standard deviation of the evaluation return of the benchmark from
+            NUM_RUNS number of simulations
+
+    Raises
+    ------
+        ValueError
+            If the specified benchmark is not available.
+    """
+    if benchmark not in AVAILABLE_BENCHMARKS.keys():
+        raise ValueError("benchmark {} is not available. Check spelling?".
+                         format(benchmark))
+
+    # get the flow params from the benchmark
+    flow_params = AVAILABLE_BENCHMARKS[benchmark]
+
+    exp_tag = flow_params["exp_tag"]
+    sumo_params = flow_params["sumo"]
+    vehicles = flow_params["veh"]
+    env_params = flow_params["env"]
+    env_params.evaluate = True  # Set to true to get evaluation returns
+    net_params = flow_params["net"]
+    initial_config = flow_params.get("initial", InitialConfig())
+    traffic_lights = flow_params.get("tls", TrafficLights())
+
+    # import the environment, scenario, and generator classes
+    module = __import__("flow.envs", fromlist=[flow_params["env_name"]])
+    env_class = getattr(module, flow_params["env_name"])
+    module = __import__("flow.scenarios", fromlist=[flow_params["scenario"]])
+    scenario_class = getattr(module, flow_params["scenario"])
+    module = __import__("flow.scenarios", fromlist=[flow_params["generator"]])
+    generator_class = getattr(module, flow_params["generator"])
+
+    # recreate the scenario and environment
+    scenario = scenario_class(name=exp_tag,
+                              generator_class=generator_class,
+                              vehicles=vehicles,
+                              net_params=net_params,
+                              initial_config=initial_config,
+                              traffic_lights=traffic_lights)
+
+    # make sure the _get_states method of the environment is the one
+    # specified by the user
+    if _get_states is not None:
+        class _env_class(env_class):
+            def get_state(self):
+                return _get_states(self)
+
+        env_class = _env_class
+        
+    env = env_class(env_params=env_params,
+                    sumo_params=sumo_params,
+                    scenario=scenario)
+
+    # create a SumoExperiment object with the "rl_actions" method as
+    # described in the inputs. Note that the state may not be that which is
+    # specified by the environment.
+    exp = SumoExperiment(env=env, scenario=scenario)
+
+    # run the experiment and return the reward
+    res = exp.run(num_runs=NUM_RUNS, num_steps=env.env_params.horizon,
+                  rl_actions=_get_actions)
+
+    return np.mean(res["returns"]), np.std(res["returns"])
+
+
+def get_compute_action_rllab(path_to_pkl):
+    """Collects the compute_action method from rllab's pkl files.
+
+    Parameters
+    ----------
+        path_to_pkl : str
+            pkl file created by rllab that contains the policy information
+
+    Returns
+    -------
+        method
+            the compute_action method from the algorithm along with the trained
+            parameters
+    """
+    # get the agent/policy
+    data = joblib.load(path_to_pkl)
+    agent = data['policy']
+
+    # restore the trained parameters
+    agent.restore()
+
+    # the compute action return an action and an info_dict, so modify to just
+    # return the action
+    def compute_action(state):
+        return agent.compute_action(state)[0]
+
+    return compute_action
+
+
+def get_compute_action_rllib(path_to_dir, checkpoint_num, alg):
+    """Collects the compute_action method from RLlib's serialized files.
+
+    Parameters
+    ----------
+        path_to_dir : str
+            RLlib directory containing training results
+        checkpoint_num : int
+            checkpoint number / training iteration of the learned policy
+        alg : str
+            name of the RLlib algorithm that was used during the training
+            procedure
+
+    Returns
+    -------
+        method
+            the compute_action method from the algorithm along with the trained
+            parameters
+    """
+    # collect the configuration information from the RLlib checkpoint
+    result_dir = path_to_dir if path_to_dir[-1] != '/' else path_to_dir[:-1]
+    config = get_rllib_config(result_dir)
+
+    # run on only one cpu for rendering purposes
+    ray.init(num_cpus=1)
+    config["num_workers"] = 1
+
+    # create and register a gym+rllib env
+    flow_params = get_flow_params(config)
+    create_env, env_name = make_create_env(params=flow_params, version=9999,
+                                           sumo_binary="sumo")
+    register_env(env_name, create_env)
+
+    # recreate the agent
+    agent_cls = get_agent_class(alg)
+    agent = agent_cls(env=env_name, registry=get_registry(), config=config)
+
+    # restore the trained parameters into the policy
+    checkpoint = result_dir + '/checkpoint-{}'.format(checkpoint_num)
+    agent._restore(checkpoint)
+
+    return agent.compute_action
diff --git a/flow/utils/leaderboard/run.py b/flow/utils/leaderboard/run.py
new file mode 100644
index 000000000..bc1b8fc8f
--- /dev/null
+++ b/flow/utils/leaderboard/run.py
@@ -0,0 +1,13 @@
+import sys
+from evaluate import evaluate_policy
+PATH = sys.argv[1]
+sys.path.append(PATH)
+from solution import BENCHMARK, get_actions, get_states
+
+# Evaluate the solution
+mean, stdev = evaluate_policy(benchmark=BENCHMARK,
+                              _get_actions=get_actions,
+                              _get_states=get_states)
+# Print results
+print(mean, stdev)
+
diff --git a/flow/utils/leaderboard/solution_template.py b/flow/utils/leaderboard/solution_template.py
new file mode 100644
index 000000000..ac394948c
--- /dev/null
+++ b/flow/utils/leaderboard/solution_template.py
@@ -0,0 +1,21 @@
+"""
+A solution should include:
+    1. Benchmark scenario;
+    2. get_actions() method;
+    3. get_states() method.
+"""
+
+# Specify benchmark scenario below.
+BENCHMARK = "" # Benchmark name goes here...
+
+# Specify get_action() method below.
+def get_actions(state):
+    # get_actions() code goes here...
+
+    return
+
+# Specify get_state() method below.
+def get_states(env,**kwargs):
+    # get_states() code goes here...    
+
+    return

From e7b52580a38518116122e807402ddbb8f5780df4 Mon Sep 17 00:00:00 2001
From: Fangyu Wu <fywu85@gmail.com>
Date: Wed, 22 Aug 2018 23:35:03 -0700
Subject: [PATCH 2/4] Fix E402 violation.

---
 flow/utils/leaderboard/run.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flow/utils/leaderboard/run.py b/flow/utils/leaderboard/run.py
index bc1b8fc8f..3723725b4 100644
--- a/flow/utils/leaderboard/run.py
+++ b/flow/utils/leaderboard/run.py
@@ -1,8 +1,8 @@
-import sys
+from solution import BENCHMARK, get_actions, get_states
 from evaluate import evaluate_policy
+import sys
 PATH = sys.argv[1]
 sys.path.append(PATH)
-from solution import BENCHMARK, get_actions, get_states
 
 # Evaluate the solution
 mean, stdev = evaluate_policy(benchmark=BENCHMARK,

From f6002f4da39b7ea93de07de2e88c53f35db643ab Mon Sep 17 00:00:00 2001
From: Fangyu Wu <fywu85@gmail.com>
Date: Wed, 22 Aug 2018 23:45:45 -0700
Subject: [PATCH 3/4] Fix more PEP violations.

---
 flow/utils/leaderboard/evaluate.py          |  2 +-
 flow/utils/leaderboard/run.py               |  1 -
 flow/utils/leaderboard/solution_template.py | 10 +++++-----
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/flow/utils/leaderboard/evaluate.py b/flow/utils/leaderboard/evaluate.py
index a9be31e3f..42349ff27 100644
--- a/flow/utils/leaderboard/evaluate.py
+++ b/flow/utils/leaderboard/evaluate.py
@@ -109,7 +109,7 @@ def get_state(self):
                 return _get_states(self)
 
         env_class = _env_class
-        
+ 
     env = env_class(env_params=env_params,
                     sumo_params=sumo_params,
                     scenario=scenario)
diff --git a/flow/utils/leaderboard/run.py b/flow/utils/leaderboard/run.py
index 3723725b4..594d7e38e 100644
--- a/flow/utils/leaderboard/run.py
+++ b/flow/utils/leaderboard/run.py
@@ -10,4 +10,3 @@
                               _get_states=get_states)
 # Print results
 print(mean, stdev)
-
diff --git a/flow/utils/leaderboard/solution_template.py b/flow/utils/leaderboard/solution_template.py
index ac394948c..e3f33505c 100644
--- a/flow/utils/leaderboard/solution_template.py
+++ b/flow/utils/leaderboard/solution_template.py
@@ -6,16 +6,16 @@
 """
 
 # Specify benchmark scenario below.
-BENCHMARK = "" # Benchmark name goes here...
+BENCHMARK = ""  # Benchmark name goes here...
+
 
 # Specify get_action() method below.
 def get_actions(state):
     # get_actions() code goes here...
-
     return
 
-# Specify get_state() method below.
-def get_states(env,**kwargs):
-    # get_states() code goes here...    
 
+# Specify get_state() method below.
+def get_states(env, **kwargs):
+    # get_states() code goes here...
     return

From 8be63bfd33443ff42209606239b659c0705a075c Mon Sep 17 00:00:00 2001
From: Fangyu Wu <fywu85@gmail.com>
Date: Wed, 22 Aug 2018 23:52:46 -0700
Subject: [PATCH 4/4] I hope this is the last PEP violation.

---
 flow/utils/leaderboard/evaluate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flow/utils/leaderboard/evaluate.py b/flow/utils/leaderboard/evaluate.py
index 42349ff27..accc19ae0 100644
--- a/flow/utils/leaderboard/evaluate.py
+++ b/flow/utils/leaderboard/evaluate.py
@@ -109,7 +109,7 @@ def get_state(self):
                 return _get_states(self)
 
         env_class = _env_class
- 
+
     env = env_class(env_params=env_params,
                     sumo_params=sumo_params,
                     scenario=scenario)