-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtictactoe.py
169 lines (137 loc) · 5.04 KB
/
tictactoe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import numpy as np
class tictactoe():
def __init__(self):
self.game_state = np.zeros((3, 3))
self.turn = 1
self.action_space_size = 9 # 9 places to play
self.obs_space_size = 9 + 1 # 9 spaces plus player whose turn it is
def reset(self):
self.game_state = np.zeros((3, 3))
self.turn = 1
obs = self.game_state.flatten()
obs = np.append(obs, [self.turn])
obs = obs.astype(np.float32)
return obs, 0, False
def step(self, action):
# action should be a number in range 0-8
if not self.isLegalAction(action):
print('Illegal action played')
print(f"action: {action}")
self.render()
print("")
raise Exception("Illegal action")
i = action // 3
j = action % 3
self.game_state[i, j] = self.turn # put 1 or -1 in the specified spot
self.turn = -self.turn # other players turn
done, outcome = self.isGameOver()
obs = self.game_state.flatten()
obs = np.append(obs, [self.turn])
return obs, outcome, done
def render(self):
print(self.game_state)
def isGameOver(self):
done = False
# check rows
for i in range(3):
same = True
for j in range(1, 3):
same = same and self.game_state[i, 0] == self.game_state[i, j]
if same and self.game_state[i, 0] != 0:
done = True
outcome = self.game_state[i, 0]
return done, outcome
# check columns
for j in range(3):
same = True
for i in range(1, 3):
same = same and self.game_state[0, j] == self.game_state[i, j]
if same and self.game_state[0, j] != 0:
done = True
outcome = self.game_state[0, j]
return done, outcome
# check diagonal top left - bottom right
same = True
for i in range(1, 3):
j = i
same = same and self.game_state[0, 0] == self.game_state[i, j]
if same and self.game_state[0, 0] != 0:
done = True
outcome = self.game_state[0, 0]
return done, outcome
# check diagonal top right - bottom
same = True
for i in range(1, 3):
j = 2 - i
same = same and self.game_state[0, 2] == self.game_state[i, j]
if same and self.game_state[2, 0] != 0:
done = True
outcome = self.game_state[2, 0]
return done, outcome
# if the whole board is full, the game is over
# check at least one space has a zero (open to play)
for i in range(3):
for j in range(3):
if self.game_state[i, j] == 0:
return False, 0
# board was full, but nobody won
return True, 0
def isLegalAction(self, action):
if action<0:
return False
if action>8:
return False
i = action // 3
j = action % 3
return self.game_state[i, j] == 0
def getLegalActionMask(self):
mask = np.zeros(9)
for i in range(9):
mask[i] = 1 if self.isLegalAction(i) else 0
return mask
def copy(self):
copy_game = tictactoe()
copy_game.game_state = np.copy(self.game_state)
copy_game.turn = self.turn
return copy_game
def get_computer_move(self): # gets the optimal move for the current player based on some simple heuristics
# based on the example heuristics found in https://inventwithpython.com/chapter10.html
# 1. check if the current player can win immediately
for i in range(9):
test_game = self.copy()
if test_game.isLegalAction(i):
obs, reward, done = test_game.step(i)
if (done and reward == self.turn):
return i
# 2. Check if the other player has a square where they could win, if so block them
for i in range(9):
test_game = self.copy()
if test_game.isLegalAction(i):
test_game.game_state[i // 3, i % 3] = -self.turn
done, outcome = test_game.isGameOver()
if done: # other player would win by playing here
return i
# 3. Take the center
if self.isLegalAction(4):
return 4
# 4. Take a corner if it is free
corners = [0, 2, 6, 8]
for c in corners:
if self.isLegalAction(c):
return c
# 5. Take a side
sides = [1, 3, 5, 7]
for s in sides:
if self.isLegalAction(s):
return s
def test_tictactoe():
game = tictactoe()
obs, reward, done = game.reset()
game.render()
print(f"obs: {obs}, reward: {reward}, done: {done}")
for i in range(8):
obs, reward, done = game.step(i)
game.render()
print(f"obs: {obs}, reward: {reward}, done: {done}")
if done:
break