forked from nuno-faria/tetris-ai
-
Notifications
You must be signed in to change notification settings - Fork 8
/
run_eval.py
75 lines (61 loc) · 2.58 KB
/
run_eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import os
from typing import List
import cv2
from dqn_agent import DQNAgent
from tetris import Tetris
from run_train import AgentConf
from keras.engine.saving import load_model
def run_eval(dir_name: str, episodes: int = 100, render: bool = False) -> List[int]:
agent_conf = AgentConf()
env = Tetris()
agent = DQNAgent(env.get_state_size(),
n_neurons=agent_conf.n_neurons, activations=agent_conf.activations,
epsilon_stop_episode=agent_conf.epsilon_stop_episode, mem_size=agent_conf.mem_size,
discount=agent_conf.discount, replay_start_size=agent_conf.replay_start_size)
# timestamp_str = "20190730-165821"
# log_dir = f'logs/tetris-nn={str(agent_conf.n_neurons)}-mem={agent_conf.mem_size}' \
# f'-bs={agent_conf.batch_size}-e={agent_conf.epochs}-{timestamp_str}'
# tetris-20190731-221411-nn=[32, 32]-mem=25000-bs=512-e=1 good
log_dir = 'logs/' + dir_name
# load_model
agent.model = load_model(f'{log_dir}/model.hdf')
agent.epsilon = 0
scores = []
for episode in range(episodes):
env.reset()
done = False
while not done:
next_states = env.get_next_states()
best_state = agent.best_state(next_states.values())
# find the action, that corresponds to the best state
best_action = None
for action, state in next_states.items():
if state == best_state:
best_action = action
break
_, done = env.hard_drop([best_action[0], 0], best_action[1], render=render)
scores.append(env.score)
# print results at the end of the episode
print(f'episode {episode} => {env.score}')
return scores
def enumerate_run_eval(episodes: int = 128, render: bool = False):
dirs = [name for name in os.listdir('logs') if os.path.isdir(os.path.join('logs', name))]
dirs.sort(reverse=True)
dirs = [dirs[0]] # take the most recent model
# dirs = [
# 'tetris-20190802-221032-ms25000-e1-ese2000-d0.99',
# 'tetris-20190802-033219-ms20000-e1-ese2000-d0.95',
# ]
dirs = ['tetris-20190802-221032-ms25000-e1-ese2000-d0.99']
max_scores = []
for d in dirs:
print(f"Evaluating dir '{d}'")
scores = run_eval(d, episodes=episodes, render=render)
max_scores.append((d, max(scores)))
max_scores.sort(key=lambda t: t[1], reverse=True)
for k, v in max_scores:
print(f"{v}\t{k}")
if __name__ == "__main__":
enumerate_run_eval(episodes=16, render=True)
cv2.destroyAllWindows()
exit(0)