ray-project · pcmoritz · Mar 23, 2018 · Mar 21, 2018 · Mar 21, 2018 · Mar 21, 2018
@@ -300,9 +300,10 @@ in the ``config`` section of the experiments.
         'cartpole-ppo': {
             'run': 'PPO',
             'env': 'CartPole-v0',
-            'resources': {
-                'cpu': 2,
-                'driver_cpu_limit': 1},
+            'trial_resources': {
+                'cpu': 1,
+                'extra_cpu': 2,  # for workers
+            },
             'stop': {
                 'episode_reward_mean': 200,
                 'time_total_s': 180

@@ -31,7 +31,7 @@ dictionary.
     run_experiments({
         "my_experiment_name": {
             "run": "my_func",
-            "resources": { "cpu": 1, "gpu": 0 },
+            "trial_resources": { "cpu": 1, "gpu": 0 },
             "stop": { "mean_accuracy": 100 },
             "config": {
                 "alpha": grid_search([0.2, 0.4, 0.6]),
@@ -73,9 +73,9 @@ For more information on variant generation, see `variant_generator.py <https://g
 Resource Allocation
 -------------------
 
-Ray Tune runs each trial as a Ray actor, allocating the specified GPU and CPU ``resources`` to each actor (defaulting to 1 CPU per trial). A trial will not be scheduled unless at least that amount of resources is available in the cluster, preventing the cluster from being overloaded.
+Ray Tune runs each trial as a Ray actor, allocating the specified GPU and CPU ``trial_resources`` to each actor (defaulting to 1 CPU per trial). A trial will not be scheduled unless at least that amount of resources is available in the cluster, preventing the cluster from being overloaded.
 
 If GPU resources are not requested, the ``CUDA_VISIBLE_DEVICES`` environment variable will be set as empty, disallowing GPU access.
-Otherwise, it will be set to a GPU in the list (this is managed by Ray).
+Otherwise, it will be set to the GPUs in the list (this is managed by Ray).
 
-If your trainable function / class creates further Ray actors or tasks that also consume CPU / GPU resources, you will also want to set ``driver_cpu_limit`` or ``driver_gpu_limit`` to tell Ray not to assign the entire resource reservation to your top-level trainable function, as described in `trial.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/trial.py>`__. For example, if a trainable class requires 1 GPU itself, but will launch 4 actors each using another GPU, then it should set ``"gpu": 5, "driver_gpu_limit": 1``.
+If your trainable function / class creates further Ray actors or tasks that also consume CPU / GPU resources, you will also want to set ``extra_cpu`` or ``extra_gpu`` to reserve extra resource slots for the actors you will create. For example, if a trainable class requires 1 GPU itself, but will launch 4 actors each using another GPU, then it should set ``"gpu": 1, "extra_gpu": 4``.
diff --git a/examples/carla/a3c_lane_keep.py b/examples/carla/a3c_lane_keep.py
@@ -31,7 +31,7 @@
     "carla-a3c": {
         "run": "A3C",
         "env": "carla_env",
-        "resources": {"cpu": 4, "gpu": 1},
+        "trial_resources": {"cpu": 4, "gpu": 1},
         "config": {
             "env_config": env_config,
             "model": {

diff --git a/examples/carla/dqn_lane_keep.py b/examples/carla/dqn_lane_keep.py
@@ -31,7 +31,7 @@
     "carla-dqn": {
         "run": "DQN",
         "env": "carla_env",
-        "resources": {"cpu": 4, "gpu": 1},
+        "trial_resources": {"cpu": 4, "gpu": 1},
         "config": {
             "env_config": env_config,
             "model": {

diff --git a/examples/carla/ppo_lane_keep.py b/examples/carla/ppo_lane_keep.py
@@ -31,7 +31,7 @@
     "carla-ppo": {
         "run": "PPO",
         "env": "carla_env",
-        "resources": {"cpu": 4, "gpu": 1},
+        "trial_resources": {"cpu": 4, "gpu": 1},
         "config": {
             "env_config": env_config,
             "model": {

diff --git a/examples/carla/train_a3c.py b/examples/carla/train_a3c.py
@@ -32,7 +32,7 @@
     "carla-a3c": {
         "run": "A3C",
         "env": "carla_env",
-        "resources": {"cpu": 5, "gpu": 2, "driver_gpu_limit": 0},
+        "trial_resources": {"cpu": 5, "extra_gpu": 2},
         "config": {
             "env_config": env_config,
             "use_gpu_for_workers": True,

diff --git a/examples/carla/train_dqn.py b/examples/carla/train_dqn.py
@@ -29,7 +29,7 @@
     "carla-dqn": {
         "run": "DQN",
         "env": "carla_env",
-        "resources": {"cpu": 4, "gpu": 1},
+        "trial_resources": {"cpu": 4, "gpu": 1},
         "config": {
             "env_config": env_config,
             "model": {

diff --git a/examples/carla/train_ppo.py b/examples/carla/train_ppo.py
@@ -28,7 +28,7 @@
     "carla": {
         "run": "PPO",
         "env": "carla_env",
-        "resources": {"cpu": 4, "gpu": 1},
+        "trial_resources": {"cpu": 4, "gpu": 1},
         "config": {
             "env_config": env_config,
             "model": {

@@ -62,7 +62,7 @@
                 "run": args.run,
                 "checkpoint_freq": args.checkpoint_freq,
                 "local_dir": args.local_dir,
-                "resources": resources_to_json(args.resources),
+                "trial_resources": resources_to_json(args.trial_resources),
                 "stop": args.stop,
                 "config": dict(args.config, env=args.env),
                 "restore": args.restore,

@@ -4,9 +4,9 @@ cartpole-ppo:
     stop:
         episode_reward_mean: 200
         time_total_s: 180
-    resources:
-        cpu: 3
-        driver_cpu_limit: 1
+    trial_resources:
+        cpu: 1
+        extra_cpu: 1
     config:
         num_workers: 2
         num_sgd_iter:

@@ -1,9 +1,8 @@
 hopper-ppo:
     env: Hopper-v1
     run: PPO
-    resources:
-        cpu: 65
+    trial_resources:
+        cpu: 1
         gpu: 4
-        driver_cpu_limit: 1
-        driver_gpu_limit: 4
+        extra_cpu: 64
     config: {"gamma": 0.995, "kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": .0001, "sgd_batchsize": 32768, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 160000, "num_workers": 64}
@@ -1,9 +1,9 @@
 humanoid-es:
     env: Humanoid-v1
     run: ES
-    resources:
-        cpu: 101
-        driver_cpu_limit: 1
+    trial_resources:
+        cpu: 1
+        extra_cpu: 100
     stop:
         episode_reward_mean: 6000
     config:

@@ -3,9 +3,9 @@ humanoid-ppo-gae:
     run: PPO
     stop:
         episode_reward_mean: 6000
-    resources:
-        cpu: 65
+    trial_resources:
+        cpu: 1
         gpu: 4
-        driver_cpu_limit: 1
+        extra_cpu: 64
     config: {"lambda": 0.95, "clip_param": 0.2, "kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": .0001, "sgd_batchsize": 32768, "horizon": 5000, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_workers": 64, "model": {"free_log_std": true}, "write_logs": false}
 
@@ -3,8 +3,8 @@ humanoid-ppo:
     run: PPO
     stop:
        episode_reward_mean: 6000
-    resources:
-       cpu: 65
+    trial_resources:
+       cpu: 1
        gpu: 4
-       driver_cpu_limit: 1
+       extra_cpu: 64
     config: {"kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": .0001, "sgd_batchsize": 32768, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_workers": 64, "model": {"free_log_std": true}, "use_gae": false}
@@ -5,9 +5,9 @@ cartpole-ppo:
     stop:
         episode_reward_mean: 200
         time_total_s: 180
-    resources:
-        cpu: 2
-        driver_cpu_limit: 1
+    trial_resources:
+        cpu: 1
+        extra_cpu: 1
     config:
         num_workers: 1
         num_sgd_iter:

@@ -2,9 +2,9 @@
 pendulum-ppo:
     env: Pendulum-v0
     run: PPO
-    resources:
-        cpu: 5
-        driver_cpu_limit: 1
+    trial_resources:
+        cpu: 1
+        extra_cpu: 4
     config:
         timesteps_per_batch: 2048
         num_workers: 4

@@ -1,9 +1,9 @@
 pong-a3c-pytorch-cnn:
     env: PongDeterministic-v4
     run: A3C
-    resources:
-        cpu: 17
-        driver_cpu_limit: 1
+    trial_resources:
+        cpu: 1
+        extra_cpu: 16
     config:
         num_workers: 16
         batch_size: 20

@@ -1,9 +1,9 @@
 pong-a3c:
     env: PongDeterministic-v4
     run: A3C
-    resources:
-        cpu: 17
-        driver_cpu_limit: 1
+    trial_resources:
+        cpu: 1
+        extra_cpu: 16
     config:
         num_workers: 16
         batch_size: 20

@@ -4,11 +4,11 @@
 pong-apex:
     env: PongNoFrameskip-v4
     run: APEX
-    resources:
-        cpu:
-            eval: 1 + spec.config.num_workers
-        driver_cpu_limit: 1
+    trial_resources:
+        cpu: 1
         gpu: 1
+        extra_cpu:
+            eval: 4 + spec.config.num_workers
     config:
         target_network_update_freq: 50000
         num_workers: 32

@@ -2,7 +2,7 @@
 pong-deterministic-dqn:
     env: PongDeterministic-v4
     run: DQN
-    resources:
+    trial_resources:
         cpu: 1
         gpu: 1
     stop:

@@ -8,10 +8,10 @@
 pong-deterministic-ppo:
     env: PongDeterministic-v4
     run: PPO
-    resources:
-        cpu: 5
+    trial_resources:
+        cpu: 1
         gpu: 1
-        driver_cpu_limit: 1
+        extra_cpu: 4
     stop:
         episode_reward_mean: 21
     config:

@@ -4,7 +4,7 @@ cartpole-a3c:
     stop:
         episode_reward_mean: 200
         time_total_s: 600
-    resources:
+    trial_resources:
         cpu: 2
     config:
         num_workers: 4

@@ -4,7 +4,7 @@ cartpole-dqn:
     stop:
         episode_reward_mean: 200
         time_total_s: 600
-    resources:
+    trial_resources:
         cpu: 1
     config:
         n_step: 3

@@ -4,7 +4,7 @@ cartpole-es:
     stop:
         episode_reward_mean: 200
         time_total_s: 300
-    resources:
+    trial_resources:
         cpu: 2
     config:
         num_workers: 2

@@ -4,7 +4,7 @@ cartpole-ppo:
     stop:
         episode_reward_mean: 200
         time_total_s: 300
-    resources:
+    trial_resources:
         cpu: 1
     config:
         num_workers: 1
@@ -1,8 +1,8 @@
 walker2d-v1-ppo:
     env: Walker2d-v1
     run: PPO
-    resources:
-        cpu: 65
+    trial_resources:
+        cpu: 1
         gpu: 4
-        driver_cpu_limit: 1
+        extra_cpu: 64
     config: {"kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": .0001, "sgd_batchsize": 32768, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_workers": 64}
@@ -15,24 +15,34 @@ def json_to_resources(data):
     if type(data) is str:
         data = json.loads(data)
     for k in data:
+        if k in ["driver_cpu_limit", "driver_gpu_limit"]:
+            raise TuneError(
+                "The field `{}` is no longer supported. Use `extra_cpu` "
+                "or `extra_gpu` instead.".format(k))
         if k not in Resources._fields:
             raise TuneError(
                 "Unknown resource type {}, must be one of {}".format(
                     k, Resources._fields))
     return Resources(
         data.get("cpu", 1), data.get("gpu", 0),
-        data.get("driver_cpu_limit"), data.get("driver_gpu_limit"))
+        data.get("extra_cpu", 0), data.get("extra_gpu", 0))
 
 
 def resources_to_json(resources):
+    if resources is None:
+        resources = Resources(cpu=1, gpu=0)
     return {
         "cpu": resources.cpu,
         "gpu": resources.gpu,
-        "driver_cpu_limit": resources.driver_cpu_limit,
-        "driver_gpu_limit": resources.driver_gpu_limit,
+        "extra_cpu": resources.extra_cpu,
+        "extra_gpu": resources.extra_gpu,
     }
 
 
+def _tune_error(msg):
+    raise TuneError(msg)
+
+
 def make_parser(**kwargs):
     """Returns a base argument parser for the ray.tune tool."""
 
@@ -56,7 +66,12 @@ def make_parser(**kwargs):
         help="Algorithm-specific configuration (e.g. env, hyperparams), "
         "specified in JSON.")
     parser.add_argument(
-        "--resources", default='{"cpu": 1}', type=json_to_resources,
+        "--resources", help="Deprecated, use --trial-resources.",
+        type=lambda v: _tune_error(
+            "The `resources` argument is no longer supported. "
+            "Use `trial_resources` or --trial-resources instead."))
+    parser.add_argument(
+        "--trial-resources", default='{"cpu": 1}', type=json_to_resources,
         help="Machine resources to allocate per trial, e.g. "
         "'{\"cpu\": 64, \"gpu\": 8}'. Note that GPUs will not be assigned "
         "unless you specify them here.")

@@ -68,7 +68,7 @@ def _restore(self, checkpoint_path):
             "run": "my_class",
             "stop": {"training_iteration": 1 if args.smoke_test else 99999},
             "repeat": 20,
-            "resources": {"cpu": 1, "gpu": 0},
+            "trial_resources": {"cpu": 1, "gpu": 0},
             "config": {
                 "width": lambda spec: 10 + int(90 * random.random()),
                 "height": lambda spec: int(100 * random.random()),

@@ -79,7 +79,7 @@ def _restore(self, checkpoint_path):
             "run": "my_class",
             "stop": {"training_iteration": 2 if args.smoke_test else 99999},
             "repeat": 10,
-            "resources": {"cpu": 1, "gpu": 0},
+            "trial_resources": {"cpu": 1, "gpu": 0},
             "config": {
                 "factor_1": 4.0,
                 "factor_2": 1.0,

@@ -50,7 +50,7 @@ def explore(config):
             "run": "PPO",
             "env": "Humanoid-v1",
             "repeat": 8,
-            "resources": {"cpu": 4, "gpu": 1},
+            "trial_resources": {"cpu": 4, "gpu": 1},
             "config": {
                 "kl_coeff": 1.0,
                 "num_workers": 8,