-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathai_test.py
165 lines (142 loc) · 5.52 KB
/
ai_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
from board_display import Board
from alphazero.train import TrainModel
from multiprocessing import Process, Queue, Manager
import random
def play_chess(q: Queue, q2: Queue):
"""
Simulate games between two AlphaZero models with dynamic parameter adjustments.
Parameters
----------
q : Queue
Queue for sending move data to the display process.
q2 : Queue
Queue for sending game results to the main process.
"""
# Base training configuration
base_train_config = {
'lr': 1e-2,
'board_len': 9,
'batch_size': 500,
'is_use_gpu': True,
'n_test_games': 10,
'n_self_plays': 4000,
'is_save_game': False,
'n_feature_planes': 6,
'check_frequency': 100,
'start_train_size': 500
}
print("Initializing AlphaZero models with differences...")
# Randomize initial settings for each model
c_puct_1 = random.uniform(2.5, 4.0)
c_puct_2 = random.uniform(2.5, 4.0)
n_mcts_iters_1 = random.randint(400, 600)
n_mcts_iters_2 = random.randint(400, 600)
# Configure each model
train_config_1 = base_train_config.copy()
train_config_1.update({'c_puct': c_puct_1, 'n_mcts_iters': n_mcts_iters_1})
train_config_2 = base_train_config.copy()
train_config_2.update({'c_puct': c_puct_2, 'n_mcts_iters': n_mcts_iters_2})
# Initialize models
train_model_1 = TrainModel(**train_config_1)
train_model_2 = TrainModel(**train_config_2)
train_model_1.policy_value_net.eval()
train_model_2.policy_value_net.eval()
train_model_1.chess_board.clear_board()
train_model_2.chess_board.clear_board()
train_model_1.mcts.set_self_play(False)
train_model_2.mcts.set_self_play(False)
board_len = 9
print("Drawing board...")
win_model_1 = 0
win_model_2 = 0
ties = 0
print("Model 1 (Black) starts first")
for i in range(10):
player = 0 # Model 1 starts as Black
train_model_1.chess_board.clear_board()
train_model_2.chess_board.clear_board()
is_over = False
# Adjust parameters based on outcomes
if ties > 0:
# Increase differences on tie
c_puct_diff = ties * 0.5
iter_diff = ties * 50
else:
# Decrease differences on win
c_puct_diff = -0.25
iter_diff = -25
# Adjust with bounds
c_puct_1 = min(max(c_puct_1 + c_puct_diff, 1.5), 5.0)
c_puct_2 = min(max(c_puct_2 - c_puct_diff, 1.5), 5.0)
n_mcts_iters_1 = min(max(n_mcts_iters_1 + iter_diff, 200), 800)
n_mcts_iters_2 = min(max(n_mcts_iters_2 - iter_diff, 200), 800)
# Update MCTS parameters
train_model_1.mcts.c_puct = c_puct_1
train_model_1.mcts.n_iters = n_mcts_iters_1
train_model_2.mcts.c_puct = c_puct_2
train_model_2.mcts.n_iters = n_mcts_iters_2
print(
f"Game {i + 1}: Model 1 (c_puct={c_puct_1}, iters={n_mcts_iters_1}) vs Model 2 (c_puct={c_puct_2}, iters={n_mcts_iters_2})")
while not is_over:
if player == 0:
# Model 1 move
is_over, winner, action = train_model_1.do_mcts_action(train_model_1.mcts)
train_model_2.chess_board.do_action(action)
x, y = action // board_len, action % board_len
print("Model 1 (Black) move:", x, y)
q.put(((y * 50 + 75, x * 50 + 75), player), block=False)
player = 1 # Switch to Model 2
else:
# Model 2 move
is_over, winner, action = train_model_2.do_mcts_action(train_model_2.mcts)
train_model_1.chess_board.do_action(action)
x, y = action // board_len, action % board_len
print("Model 2 (White) move:", x, y)
q.put(((y * 50 + 75, x * 50 + 75), player), block=False)
player = 0 # Switch to Model 1
# Record results and reset for the next game
if winner == 0:
print(f"Game {i + 1}: Model 1 (Black) wins")
q2.put(0)
win_model_1 += 1
ties = 0 # Reset ties on win
elif winner == 1:
print(f"Game {i + 1}: Model 2 (White) wins")
q2.put(1)
win_model_2 += 1
ties = 0
else:
print(f"Game {i + 1}: It's a tie")
q2.put(-1)
ties += 1
print(f"Model 1 (Black) win rate: {win_model_1 / 10 * 100}%")
print(f"Model 2 (White) win rate: {win_model_2 / 10 * 100}%")
print(f"Number of ties: {ties}")
if __name__ == '__main__':
q = Manager().Queue(maxsize=-1)
q2 = Manager().Queue(maxsize=-1)
# Start the chess playing process
p = Process(target=play_chess, args=(q, q2))
p.start()
# Display the game board
board = Board(9)
while True:
board.clock.tick(60)
board.quit()
# Reset board after each game
while not q2.empty():
result = q2.get()
while not q.empty():
q.get()
board.new_game()
if result == -1:
print("The game was a tie. Adjusting parameters for the next game...")
elif result == 0:
print("Model 1 won the game. Reducing parameter differences...")
elif result == 1:
print("Model 2 won the game. Reducing parameter differences...")
# Draw moves as they come in
while not q.empty():
pos, player = q.get()
board.draw_circle(pos=pos, player=player)
board.update()