-
Notifications
You must be signed in to change notification settings - Fork 5.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[RLlib] Separate PPO torch regression test, and make it longer (#31892)
Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
- Loading branch information
1 parent
f9fa0b2
commit 20bfcdd
Showing
3 changed files
with
38 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
36 changes: 36 additions & 0 deletions
36
...ase/rllib_tests/learning_tests/yaml_files/ppo/torch/ppo-breakoutnoframeskip-v5-torch.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
ppo-breakoutnoframeskip-v5: | ||
env: ALE/Breakout-v5 | ||
run: PPO | ||
# Minimum reward and total ts (in given time_total_s) to pass this test. | ||
pass_criteria: | ||
episode_reward_mean: 50.0 | ||
timesteps_total: 7000000 | ||
stop: | ||
# This is double the time we use for tf because of 2x throughput there. | ||
time_total_s: 7200 | ||
config: | ||
# Make analogous to old v4 + NoFrameskip. | ||
env_config: | ||
frameskip: 1 | ||
full_action_space: false | ||
repeat_action_probability: 0.0 | ||
lambda: 0.95 | ||
kl_coeff: 0.5 | ||
clip_rewards: True | ||
clip_param: 0.1 | ||
vf_clip_param: 10.0 | ||
entropy_coeff: 0.01 | ||
train_batch_size: 5000 | ||
rollout_fragment_length: auto | ||
sgd_minibatch_size: 500 | ||
num_sgd_iter: 10 | ||
num_workers: 30 | ||
num_envs_per_worker: 1 | ||
batch_mode: truncate_episodes | ||
observation_filter: NoFilter | ||
model: | ||
vf_share_layers: true | ||
num_gpus: 2 | ||
min_time_s_per_iteration: 30 | ||
lr: 0.0001 | ||
grad_clip: 100 |