[RLlib] Separate PPO torch regression test, and make it longer (#31892)

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
ray-project · Jan 28, 2023 · 20bfcdd · 20bfcdd
1 parent f9fa0b2
commit 20bfcdd
Show file tree

Hide file tree

Showing 3 changed files with 38 additions and 2 deletions.
diff --git a/release/release_tests.yaml b/release/release_tests.yaml
@@ -3174,7 +3174,7 @@
 
   run:
     timeout: 18000
-    script: python learning_tests/run.py --yaml-sub-dir=ppo --framework=tf
+    script: python learning_tests/run.py --yaml-sub-dir=ppo/tf --framework=tf
 
 
   alert: default
@@ -3197,7 +3197,7 @@
 
   run:
     timeout: 18000
-    script: python learning_tests/run.py --yaml-sub-dir=ppo --framework=torch
+    script: python learning_tests/run.py --yaml-sub-dir=ppo/torch --framework=torch
 
 
   alert: default

diff --git a/...files/ppo/ppo-breakoutnoframeskip-v5.yaml → ...ppo/tf/ppo-breakoutnoframeskip-v5-tf.yaml b/...files/ppo/ppo-breakoutnoframeskip-v5.yaml → ...ppo/tf/ppo-breakoutnoframeskip-v5-tf.yaml
diff --git a/...ase/rllib_tests/learning_tests/yaml_files/ppo/torch/ppo-breakoutnoframeskip-v5-torch.yaml b/...ase/rllib_tests/learning_tests/yaml_files/ppo/torch/ppo-breakoutnoframeskip-v5-torch.yaml
@@ -0,0 +1,36 @@
+ppo-breakoutnoframeskip-v5:
+    env: ALE/Breakout-v5
+    run: PPO
+    # Minimum reward and total ts (in given time_total_s) to pass this test.
+    pass_criteria:
+        episode_reward_mean: 50.0
+        timesteps_total: 7000000
+    stop:
+        # This is double the time we use for tf because of 2x throughput there.
+        time_total_s: 7200
+    config:
+        # Make analogous to old v4 + NoFrameskip.
+        env_config:
+            frameskip: 1
+            full_action_space: false
+            repeat_action_probability: 0.0
+        lambda: 0.95
+        kl_coeff: 0.5
+        clip_rewards: True
+        clip_param: 0.1
+        vf_clip_param: 10.0
+        entropy_coeff: 0.01
+        train_batch_size: 5000
+        rollout_fragment_length: auto
+        sgd_minibatch_size: 500
+        num_sgd_iter: 10
+        num_workers: 30
+        num_envs_per_worker: 1
+        batch_mode: truncate_episodes
+        observation_filter: NoFilter
+        model:
+            vf_share_layers: true
+        num_gpus: 2
+        min_time_s_per_iteration: 30
+        lr: 0.0001
+        grad_clip: 100