-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtrain.py
114 lines (99 loc) · 3.61 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import Game
import time
import net
import copy
net = net.Net()
# net.load()
count = 0
wins = 0
while count < 1000:
count += 1
print("Game #:{}".format(count))
g = Game.Game()
# print("White player rolled {}, Black player rolled {}".format(p1Roll[0] + p1Roll[1], p2Roll[0] + p2Roll[1]))
p1Roll = (0, 0)
p2Roll = (0, 0)
while sum(p1Roll) == sum(p2Roll):
p1Roll = g.roll_dice()
p2Roll = g.roll_dice()
if sum(p1Roll) > sum(p2Roll):
print("White player gets the first turn...")
g.turn = g.players[0]
else:
print("Black player gets the first turn")
g.turn = g.players[1]
start = 1
moves = 0
states = []
while not g.game_over():
actions = []
if start == 1:
actions = g.find_moves(p1Roll, g.turn)
start = 0
else:
actions = g.find_moves(g.roll_dice(), g.turn)
if len(actions) > 0:
values = []
# Find the action with the most appealing value
for action in actions:
g.take_action(g.turn, action)
representation = g.get_representation(
g.board, g.players, g.on_bar, g.off_board, g.turn
)
values.append(net.getValue(representation))
# Undo the action and try the rest
g.undo_action(g.turn, action)
# We want white to win so find the max for white and the smallest for black
max = 0
max_index = 0
min = 1
min_index = 0
for i in range(0, len(values)):
if g.turn == "white":
if max < values[i][0]:
max = values[i][0]
max_index = i
elif g.turn == "black":
if min > values[i][1]:
min = values[i][1]
min_index = i
if g.turn == "white":
best_action = actions[max_index]
else:
best_action = actions[min_index]
# Take the best action
g.take_action(g.turn, best_action)
# Get the representation
expected_board = g.get_representation(
g.board, g.players, g.on_bar, g.off_board, g.turn
)
if g.turn == "white":
# Save the state
states.append(expected_board)
# print(net.getValue(expected_board))
# print('state size',len(states))
# Swap turns and increment move count
moves += 1
g.turn = g.get_opponent(g.turn)
reward = 0
if g.game_over():
print("Game over in {} moves".format(moves))
print("Num states: ", len(states))
print("{} won".format(g.find_winner()))
if g.find_winner() == "white":
reward = 1
wins += 1
for i in range(len(g.board)):
g.print_point(i)
# Build the eligibility trace with the list of states white has accumulated
for i in range(0, len(states) - 2):
print("State:", i)
# Feed in current state and the next state
# the eligibility is based on states t and t+1
current_state = states[i]
predicted_state = states[i + 1]
error = net.getValue(predicted_state)[0] - net.getValue(current_state)[0]
net.feedforward(current_state)
net.do_td(current_state, net.getValue(current_state), error)
print("Win percentage: {}".format(wins / count))
# net.save()