diff --git a/python/ppo.py b/python/ppo.py index 66e036c4cb..2b688443fb 100755 --- a/python/ppo.py +++ b/python/ppo.py @@ -31,6 +31,7 @@ --learning-rate= Model learning rate [default: 3e-4]. --hidden-units= Number of units in hidden layer [default: 64]. --batch-size= How many experiences per gradient descent update step [default: 64]. + --keep-checkpoints= How many model checkpoints to keep [default: 5]. ''' options = docopt(_USAGE) @@ -45,6 +46,7 @@ summary_freq = int(options['--summary-freq']) save_freq = int(options['--save-freq']) env_name = options[''] +keep_checkpoints = int(options['--keep-checkpoints']) # Algorithm-specific parameters for tuning gamma = float(options['--gamma']) @@ -79,7 +81,7 @@ os.makedirs(summary_path) init = tf.global_variables_initializer() -saver = tf.train.Saver() +saver = tf.train.Saver(max_to_keep=keep_checkpoints) with tf.Session() as sess: # Instantiate model parameters