From bdf40c3add3b2b3a3b99e3d829e1d291f7f357b1 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Mon, 18 May 2020 11:16:00 -0700 Subject: [PATCH 1/2] Add missing config and make sure to use floats in example --- docs/Training-ML-Agents.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index 4afdfa9f3d..aced98710e 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -204,6 +204,7 @@ BehaviorPPO: normalize: false num_layers: 2 time_horizon: 64 + summary_freq: 10000 vis_encoder_type: simple # PPO-specific configs @@ -239,7 +240,7 @@ BehaviorPPO: strength: 0.02 gamma: 0.99 encoding_size: 256 - learning_rate: 3e-4 + learning_rate: 3.0e-4 # GAIL gail: @@ -247,7 +248,7 @@ BehaviorPPO: gamma: 0.99 encoding_size: 128 demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo - learning_rate: 3e-4 + learning_rate: 3.0e-4 use_actions: false use_vail: false From 6a5d81b354d7c3c80d0d60343c128a8d733849b9 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Mon, 18 May 2020 11:26:15 -0700 Subject: [PATCH 2/2] Moved init_path --- docs/Training-ML-Agents.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index aced98710e..001939ec89 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -206,6 +206,7 @@ BehaviorPPO: time_horizon: 64 summary_freq: 10000 vis_encoder_type: simple + init_path: null # PPO-specific configs beta: 5.0e-3 @@ -227,7 +228,6 @@ BehaviorPPO: batch_size: 512 num_epoch: 3 samples_per_update: 0 - init_path: reward_signals: # environment reward