@@ -67,10 +67,11 @@ def main():
67
67
parser .add_argument ('--seed' , type = int , default = None )
68
68
parser .add_argument ('--outdir' , type = str , default = None )
69
69
parser .add_argument ('--use-sdl' , action = 'store_true' )
70
- parser .add_argument ('--t-max' , type = int , default = 5 )
70
+ parser .add_argument ('--t-max' , type = int , default = 20 )
71
71
parser .add_argument ('--beta' , type = float , default = 1e-2 )
72
72
parser .add_argument ('--profile' , action = 'store_true' )
73
73
parser .add_argument ('--steps' , type = int , default = 10 ** 7 )
74
+ parser .add_argument ('--lr' , type = float , default = 7e-4 )
74
75
parser .set_defaults (use_sdl = False )
75
76
args = parser .parse_args ()
76
77
@@ -99,14 +100,15 @@ def agent_func(process_idx):
99
100
np .random .uniform (- 3e-4 , 3e-4 , size = param .data .shape )
100
101
101
102
# opt = optimizers.RMSprop(lr=1e-3)
102
- opt = rmsprop_ones .RMSpropOnes (lr = 1e-3 , eps = 1e-2 , alpha = 0.999 )
103
+ opt = rmsprop_ones .RMSpropOnes (lr = 7e-4 , eps = 1e-2 , alpha = 0.99 )
103
104
# opt = rmsprop_ones.RMSpropOnes(lr=1e-4, eps=1e-1)
104
105
# opt = optimizers.RMSpropGraves(
105
106
# lr=2.5e-4, alpha=0.95, momentum=0.95, eps=1e-2)
106
107
model = chainer .ChainList (pi , v )
107
108
opt .setup (model )
108
- opt .add_hook (chainer .optimizer .GradientClipping (2 ))
109
- return a3c .A3C (model , opt , args .t_max , 0.99 , beta = args .beta , process_idx = process_idx , phi = phi )
109
+ opt .add_hook (chainer .optimizer .GradientClipping (40 ))
110
+ return a3c .A3C (model , opt , args .t_max , 0.99 , beta = args .beta ,
111
+ process_idx = process_idx , phi = phi )
110
112
111
113
def env_func (process_idx ):
112
114
return ale .ALE (args .rom , use_sdl = args .use_sdl )
@@ -119,14 +121,17 @@ def run_func(process_idx, agent, env):
119
121
try :
120
122
for i in range (args .steps ):
121
123
124
+ agent .optimizer .lr = (args .steps - i ) / args .steps * args .lr
125
+
122
126
total_r += env .reward
123
127
episode_r += env .reward
124
128
125
129
action = agent .act (env .state , env .reward , env .is_terminal )
126
130
127
131
if env .is_terminal :
128
132
if process_idx == 0 :
129
- print ('{} i:{} episode_r:{}' .format (outdir , i , episode_r ))
133
+ print ('{} i:{} lr:{} episode_r:{}' .format (
134
+ outdir , i , agent .optimizer .lr , episode_r ))
130
135
with open (os .path .join (outdir , 'scores.txt' ), 'a+' ) as f :
131
136
print (i , episode_r , file = f )
132
137
if max_score == None or episode_r > max_score :
0 commit comments