-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathresearch.py
82 lines (72 loc) · 2.38 KB
/
research.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from libs.othellogame import WHITE, BLACK
def state2img(board, player):
''' create an image from the given board, from the given player's
persepctive '''
good = [1, 0]
bad = [0, 1]
neutral = [0, 0]
if player == WHITE:
piece2vector = [
neutral, # EMPTY 0
good, # WHITE 1
bad, # BLACK -1
]
else:
piece2vector = [
neutral, # EMPTY 0
bad, # WHITE 1
good, # BLACK -1
]
img = []
for row in range(10, 90, 10):
for piece in board[row+1:row+9]:
img.append(piece2vector[piece])
return img
import tensorflow as tf
from tensorflow import keras
def generate_model():
'''generate the standard model used for this othello game'''
model = keras.Sequential([
keras.layers.Flatten(input_shape=(8*8, 2)),
keras.layers.Dense(16),
keras.layers.Dense(1, activation='linear'),
])
model.compile(
optimizer='adam',
loss='mean_squared_error',
metrics=['mean_squared_error']
)
return model
from libs.util import ProgressBar
def generate_population(pop_size=16):
'''Generate a population of models of size pop_size'''
print(f"generating population with size {pop_size}...")
bar = ProgressBar(pop_size)
population = []
for _ in range(pop_size):
population.append(generate_model())
bar.update()
return population
''' SERIALIZATION OF DATA '''
def serialize_pop(population):
""" Serializes the given population along with each of their scores """
return [ agent.get_weights() for agent in population ]
def deserialize_pop(serialized_pop):
pop_size = len(serialized_pop)
population = generate_population(pop_size)
print(f"Deserializing pop data...")
bar = ProgressBar(pop_size)
for agent, weights in zip(population, serialized_pop):
agent.set_weights(weights)
bar.update()
return population
def calc_top_k(population, agent2score, p=0.2):
""" calculates the top p% of the population that contribute to the total
score """
running_total = 0
ordered_costs = [ agent2score[agent] for agent in population ]
limit = p * sum(map(abs, ordered_costs))
for top_cut, agent in enumerate(population, start=1):
running_total += agent2score[agent]
if running_total > limit:
return top_cut