-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgame.py
146 lines (125 loc) · 4.85 KB
/
game.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import numpy as np
class Game:
"""
TicTacToe game class
"""
def __init__(self, size=5):
self.size = size
self.winning_inds = self.get_winning_inds()
def get_init_board(self):
"""
Returns:
startBoard: a representation of the board (ideally this is the form
that will be the input to your neural network)
"""
return np.array([[0]*self.size for x in range(self.size)])
def get_board_size(self):
"""
Returns:
(x,y): a tuple of board dimensions
"""
return self.size, self.size
def get_action_size(self):
"""
Returns:
actionSize: number of all possible actions
"""
return self.size**2
def get_next_state(self, board, player, action):
"""
Input:
board: current board
player: current player (1 or -1)
action: action taken by current player
Returns:
nextBoard: board after applying action
nextPlayer: player who plays in the next turn (should be -player)
"""
flat = board.flatten()
flat[action] = player
board = flat.reshape(self.size, self.size)
return board, -player
def get_valid_moves(self, board):
"""
Input:
board: current board
Returns:
validMoves: a binary vector of length self.getActionSize(), 1 for
moves that are valid from the current board,
0 for invalid moves
"""
flat = board.flatten()
return flat == 0
def check_game_ended(self, board, player):
"""
Input:
board: current board in canonical form
player: current player (1 or -1)
Returns:
r: 0 if game has not ended. 1 if player won, -1 if player lost,
small non-zero value for draw.
""" # todo make sure this is working right
winner = 0
for x in self.winning_inds:
vals = board.flatten()[x]
if all(vals == 1):
winner = 1
if all(vals == -1):
winner = -1
if sum(self.get_valid_moves(board)) == 0:
winner = 1e-8
return winner
def get_winning_inds(self):
sz = self.size
winning_inds = []
for i in range(sz):
winning_inds.append(range(i*sz, i*sz+sz)) # add all rows
winning_inds.append(range(i, sz**2)[::sz]) # add all columns
winning_inds.append(range(sz**2)[::sz+1]) # add diagonal from top left
winning_inds.append(range(sz-1, sz**2-1)[::sz-1]) # add diagonal from top right
return winning_inds
def get_canonical_board(self, board, player):
"""
Input:
board: current board
player: current player (1 or -1)
Returns:
canonicalBoard: returns canonical form of board. The canonical form
should be independent of player. For e.g. in chess,
the canonical form can be chosen to be from the pov
of white. When the player is white, we can return
board as is. When the player is black, we can invert
the colors and return the board.
"""
return board * player
def get_symmetries(self, board, pi):
"""
Input:
board: current board
pi: policy vector of size self.getActionSize()
Returns:
symmForms: a list of [(board,pi)] where each tuple is a symmetrical
form of the board and the corresponding pi vector. This
is used when training the neural network from examples.
"""
assert (len(pi) == self.size ** 2)
pi_board = np.reshape(pi, (self.size, self.size))
symmetrical_training_examples = []
for i in range(1, 5):
for j in [True, False]:
new_b = np.rot90(board, i) # flip the board 90 degrees 8 times
new_pi = np.rot90(pi_board, i) # do the same for p vector
if j: # half of the time we'll mirror the board (and vector)
new_b = np.fliplr(new_b)
new_pi = np.fliplr(new_pi)
symmetrical_training_examples += [(new_b, list(new_pi.ravel()))]
return symmetrical_training_examples # returns 8 symmetrical iterations for every game state
def string_rep(self, board):
"""
Input:
board: current board
Returns:
boardString: a quick conversion of board to a string format.
Required by MCTS for hashing.
"""
return str(board.flatten())[1:-1]