diff --git a/examples/atari/README.md b/examples/atari/README.md index d89d9f217..d0154502d 100644 --- a/examples/atari/README.md +++ b/examples/atari/README.md @@ -102,10 +102,10 @@ One epoch here is equal to 100,000 env step, 100 epochs stand for 10M. | task | best reward | reward curve | parameters | | --------------------------- | ----------- | ------------------------------------- | ------------------------------------------------------------ | -| PongNoFrameskip-v4 | 20 | ![](results/ppo/Pong_rew.png) | `python3 atari_ppo.py --task "PongNoFrameskip-v4"` | -| BreakoutNoFrameskip-v4 | 442.1 | ![](results/ppo/Breakout_rew.png) | `python3 atari_ppo.py --task "BreakoutNoFrameskip-v4"` | -| EnduroNoFrameskip-v4 | 1386.4 | ![](results/ppo/Enduro_rew.png) | `python3 atari_ppo.py --task "EnduroNoFrameskip-v4"` | -| QbertNoFrameskip-v4 | 19585 | ![](results/ppo/Qbert_rew.png) | `python3 atari_ppo.py --task "QbertNoFrameskip-v4"` | -| MsPacmanNoFrameskip-v4 | 2319 | ![](results/ppo/MsPacman_rew.png) | `python3 atari_ppo.py --task "MsPacmanNoFrameskip-v4"` | -| SeaquestNoFrameskip-v4 | 1764 | ![](results/ppo/Seaquest_rew.png) | `python3 atari_ppo.py --task "SeaquestNoFrameskip-v4"` | -| SpaceInvadersNoFrameskip-v4 | 1184 | ![](results/ppo/SpaceInvaders_rew.png) | `python3 atari_ppo.py --task "SpaceInvadersNoFrameskip-v4"` | +| PongNoFrameskip-v4 | 20.1 | ![](results/ppo/Pong_rew.png) | `python3 atari_ppo.py --task "PongNoFrameskip-v4"` | +| BreakoutNoFrameskip-v4 | 438.5 | ![](results/ppo/Breakout_rew.png) | `python3 atari_ppo.py --task "BreakoutNoFrameskip-v4"` | +| EnduroNoFrameskip-v4 | 1304.8 | ![](results/ppo/Enduro_rew.png) | `python3 atari_ppo.py --task "EnduroNoFrameskip-v4"` | +| QbertNoFrameskip-v4 | 13640 | ![](results/ppo/Qbert_rew.png) | `python3 atari_ppo.py --task "QbertNoFrameskip-v4"` | +| MsPacmanNoFrameskip-v4 | 1930 | ![](results/ppo/MsPacman_rew.png) | `python3 atari_ppo.py --task "MsPacmanNoFrameskip-v4"` | +| SeaquestNoFrameskip-v4 | 904 | ![](results/ppo/Seaquest_rew.png) | `python3 atari_ppo.py --task "SeaquestNoFrameskip-v4" --lr 2.5e-5` | +| SpaceInvadersNoFrameskip-v4 | 843 | ![](results/ppo/SpaceInvaders_rew.png) | `python3 atari_ppo.py --task "SpaceInvadersNoFrameskip-v4"` | diff --git a/examples/atari/atari_ppo.py b/examples/atari/atari_ppo.py index 668d036fa..b123f1078 100644 --- a/examples/atari/atari_ppo.py +++ b/examples/atari/atari_ppo.py @@ -24,7 +24,7 @@ def get_args(): parser.add_argument('--seed', type=int, default=4213) parser.add_argument('--scale-obs', type=int, default=0) parser.add_argument('--buffer-size', type=int, default=100000) - parser.add_argument('--lr', type=float, default=1e-4) + parser.add_argument('--lr', type=float, default=5e-5) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--epoch', type=int, default=100) parser.add_argument('--step-per-epoch', type=int, default=100000) diff --git a/examples/atari/results/ppo/Breakout_rew.png b/examples/atari/results/ppo/Breakout_rew.png index 8625787d7..296bb3745 100644 Binary files a/examples/atari/results/ppo/Breakout_rew.png and b/examples/atari/results/ppo/Breakout_rew.png differ diff --git a/examples/atari/results/ppo/Enduro_rew.png b/examples/atari/results/ppo/Enduro_rew.png index 50a23fa76..b445ba061 100644 Binary files a/examples/atari/results/ppo/Enduro_rew.png and b/examples/atari/results/ppo/Enduro_rew.png differ diff --git a/examples/atari/results/ppo/MsPacman_rew.png b/examples/atari/results/ppo/MsPacman_rew.png index 34836550b..c16089d9c 100644 Binary files a/examples/atari/results/ppo/MsPacman_rew.png and b/examples/atari/results/ppo/MsPacman_rew.png differ diff --git a/examples/atari/results/ppo/Pong_rew.png b/examples/atari/results/ppo/Pong_rew.png index c52fdc202..62d05b278 100644 Binary files a/examples/atari/results/ppo/Pong_rew.png and b/examples/atari/results/ppo/Pong_rew.png differ diff --git a/examples/atari/results/ppo/Qbert_rew.png b/examples/atari/results/ppo/Qbert_rew.png index 03c83ddac..8db8b67ba 100644 Binary files a/examples/atari/results/ppo/Qbert_rew.png and b/examples/atari/results/ppo/Qbert_rew.png differ diff --git a/examples/atari/results/ppo/Seaquest_rew.png b/examples/atari/results/ppo/Seaquest_rew.png index 675013356..200a68eba 100644 Binary files a/examples/atari/results/ppo/Seaquest_rew.png and b/examples/atari/results/ppo/Seaquest_rew.png differ diff --git a/examples/atari/results/ppo/SpaceInvaders_rew.png b/examples/atari/results/ppo/SpaceInvaders_rew.png index 4c090a906..93a521e6b 100644 Binary files a/examples/atari/results/ppo/SpaceInvaders_rew.png and b/examples/atari/results/ppo/SpaceInvaders_rew.png differ