From 20bfcdd7f834daa0a97a630613152280465ac537 Mon Sep 17 00:00:00 2001 From: Artur Niederfahrenhorst Date: Sat, 28 Jan 2023 09:04:34 -0800 Subject: [PATCH] [RLlib] Separate PPO torch regression test, and make it longer (#31892) Signed-off-by: Artur Niederfahrenhorst --- release/release_tests.yaml | 4 +-- .../ppo-breakoutnoframeskip-v5-tf.yaml} | 0 .../ppo-breakoutnoframeskip-v5-torch.yaml | 36 +++++++++++++++++++ 3 files changed, 38 insertions(+), 2 deletions(-) rename release/rllib_tests/learning_tests/yaml_files/ppo/{ppo-breakoutnoframeskip-v5.yaml => tf/ppo-breakoutnoframeskip-v5-tf.yaml} (100%) create mode 100644 release/rllib_tests/learning_tests/yaml_files/ppo/torch/ppo-breakoutnoframeskip-v5-torch.yaml diff --git a/release/release_tests.yaml b/release/release_tests.yaml index e1db50e84d51..0560a7f47cfd 100644 --- a/release/release_tests.yaml +++ b/release/release_tests.yaml @@ -3174,7 +3174,7 @@ run: timeout: 18000 - script: python learning_tests/run.py --yaml-sub-dir=ppo --framework=tf + script: python learning_tests/run.py --yaml-sub-dir=ppo/tf --framework=tf alert: default @@ -3197,7 +3197,7 @@ run: timeout: 18000 - script: python learning_tests/run.py --yaml-sub-dir=ppo --framework=torch + script: python learning_tests/run.py --yaml-sub-dir=ppo/torch --framework=torch alert: default diff --git a/release/rllib_tests/learning_tests/yaml_files/ppo/ppo-breakoutnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/ppo/tf/ppo-breakoutnoframeskip-v5-tf.yaml similarity index 100% rename from release/rllib_tests/learning_tests/yaml_files/ppo/ppo-breakoutnoframeskip-v5.yaml rename to release/rllib_tests/learning_tests/yaml_files/ppo/tf/ppo-breakoutnoframeskip-v5-tf.yaml diff --git a/release/rllib_tests/learning_tests/yaml_files/ppo/torch/ppo-breakoutnoframeskip-v5-torch.yaml b/release/rllib_tests/learning_tests/yaml_files/ppo/torch/ppo-breakoutnoframeskip-v5-torch.yaml new file mode 100644 index 000000000000..3241d39d37ab --- /dev/null +++ b/release/rllib_tests/learning_tests/yaml_files/ppo/torch/ppo-breakoutnoframeskip-v5-torch.yaml @@ -0,0 +1,36 @@ +ppo-breakoutnoframeskip-v5: + env: ALE/Breakout-v5 + run: PPO + # Minimum reward and total ts (in given time_total_s) to pass this test. + pass_criteria: + episode_reward_mean: 50.0 + timesteps_total: 7000000 + stop: + # This is double the time we use for tf because of 2x throughput there. + time_total_s: 7200 + config: + # Make analogous to old v4 + NoFrameskip. + env_config: + frameskip: 1 + full_action_space: false + repeat_action_probability: 0.0 + lambda: 0.95 + kl_coeff: 0.5 + clip_rewards: True + clip_param: 0.1 + vf_clip_param: 10.0 + entropy_coeff: 0.01 + train_batch_size: 5000 + rollout_fragment_length: auto + sgd_minibatch_size: 500 + num_sgd_iter: 10 + num_workers: 30 + num_envs_per_worker: 1 + batch_mode: truncate_episodes + observation_filter: NoFilter + model: + vf_share_layers: true + num_gpus: 2 + min_time_s_per_iteration: 30 + lr: 0.0001 + grad_clip: 100