Skip to content

Commit

Permalink
[RLlib] Separate PPO torch regression test, and make it longer (#31892)
Browse files Browse the repository at this point in the history
Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
  • Loading branch information
ArturNiederfahrenhorst authored Jan 28, 2023
1 parent f9fa0b2 commit 20bfcdd
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 2 deletions.
4 changes: 2 additions & 2 deletions release/release_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3174,7 +3174,7 @@

run:
timeout: 18000
script: python learning_tests/run.py --yaml-sub-dir=ppo --framework=tf
script: python learning_tests/run.py --yaml-sub-dir=ppo/tf --framework=tf


alert: default
Expand All @@ -3197,7 +3197,7 @@

run:
timeout: 18000
script: python learning_tests/run.py --yaml-sub-dir=ppo --framework=torch
script: python learning_tests/run.py --yaml-sub-dir=ppo/torch --framework=torch


alert: default
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
ppo-breakoutnoframeskip-v5:
env: ALE/Breakout-v5
run: PPO
# Minimum reward and total ts (in given time_total_s) to pass this test.
pass_criteria:
episode_reward_mean: 50.0
timesteps_total: 7000000
stop:
# This is double the time we use for tf because of 2x throughput there.
time_total_s: 7200
config:
# Make analogous to old v4 + NoFrameskip.
env_config:
frameskip: 1
full_action_space: false
repeat_action_probability: 0.0
lambda: 0.95
kl_coeff: 0.5
clip_rewards: True
clip_param: 0.1
vf_clip_param: 10.0
entropy_coeff: 0.01
train_batch_size: 5000
rollout_fragment_length: auto
sgd_minibatch_size: 500
num_sgd_iter: 10
num_workers: 30
num_envs_per_worker: 1
batch_mode: truncate_episodes
observation_filter: NoFilter
model:
vf_share_layers: true
num_gpus: 2
min_time_s_per_iteration: 30
lr: 0.0001
grad_clip: 100

0 comments on commit 20bfcdd

Please sign in to comment.