Skip to content

Commit

Permalink
added file to start algo task; related to #16 ;
Browse files Browse the repository at this point in the history
  • Loading branch information
dimitry12 committed Nov 19, 2018
1 parent 2217c91 commit b44f7b9
Showing 1 changed file with 92 additions and 0 deletions.
92 changes: 92 additions & 0 deletions RepeatCopy-PPO.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from sklearn.model_selection import GridSearchCV
import pandas as pd
import namesgenerator
from ppo import main
import json


default_hyperparameters = {
'VERSION': '2.1.0',
'ENV': 'RepeatCopy-v0',
'RANDOM_SEED': 42,
'RENDER': False,

'MLP_UNITS': 16,
'MLP_LAYERS': 1, # one shared hidden layer between V and P
'P_MLP_LAYERS': 0, # policy network is linear
'V_MLP_LAYERS': 1, # V network is "deep"

# the dimensionality for all these is: "MDP state transitions" (not observational frames)
'GRADIENT_LEARNING_BATCH_SIZE': 32,
'TRANSITIONS_IN_EXPERIENCE_BUFFER': 1024,
'HORIZON': 1024,
'TOTAL_ENV_STEPS': 1e5, # 2e7,

'EPOCHS_PER_UPDATE': 4,

'CLIP_RANGE': .2,
'GAMMA': .99,
'ADVANTAGE_LAMBDA': .97,
'VALUE_LAMBDA': .99,
'MAX_GRAD_NORM': .5,
'VALUE_LOSS_WEIGHT': .25,
'LR': 3e-4,
}


class RLEstimator():
params_default = default_hyperparameters
params_values = {}
score_ = None
_flushed_score = False
_random_name = ''

def __init__(self, *args, **kwargs):
self.set_params(**kwargs)

def get_params(self, *args, **kwargs):
return self.params_values

def set_params(self, *args, **kwargs):
for param, param_default_value in self.params_default.items():
if param in kwargs:
self.params_values[param] = kwargs[param]
elif param in self.params_values:
pass
else:
self.params_values[param] = param_default_value

def fit(self, X):
self._random_name = namesgenerator.get_random_name()
self.score_ = main(hparams=self.params_values,
random_name=self._random_name)
return self

def score(self, X):
if not self._flushed_score:
self._flushed_score = True
with open('score.log', 'a') as log:
log.write(json.dumps(
{'score': self.score_, 'random_name': self._random_name, **self.params_values}) + "\n")
return self.score_


if __name__ == '__main__':
# main(hparams=default_hyperparameters)
# exit()
INITS_PER_HYPERSET = 2
assert INITS_PER_HYPERSET % 2 == 0
rle = RLEstimator()
gs = GridSearchCV(rle, {
'RENDER': [False],
'RANDOM_SEEED': [False],
'GAMMA': [0.95, 0.99, 0.9],
'ADVANTAGE_LAMBDA': [0.8, 0.9, 0.95],
'VALUE_LAMBDA': [0.8, 0.9, 0.95],
'TRANSITIONS_IN_EXPERIENCE_BUFFER': [128, 256, 512, 1024],
'GRADIENT_LEARNING_BATCH_SIZE': [16, 32, 64, 128],
'TOTAL_ENV_STEPS': [10_000],
}, cv=INITS_PER_HYPERSET, n_jobs=-1, refit=False)
gs.fit([.0]*INITS_PER_HYPERSET)
print(pd.DataFrame(gs.cv_results_).filter(
regex='^(param_)|(mean_test_score)'))

0 comments on commit b44f7b9

Please sign in to comment.