-
Notifications
You must be signed in to change notification settings - Fork 0
/
president.py
436 lines (332 loc) · 13.5 KB
/
president.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
import numpy as np
import time
from constants import ranks, rewards, statuses, deck, rev_ranks, rank_max
from utils import find_best, find_worst, game_reward
"""
This file contains the three main classes of the game :
- history : in order to keep the history of the game.
- player : the player that plays with an agent.
- game : the class that handles the game.
"""
class history:
""" Class in order to keep the history of the game for each player """
def __init__(self, n) :
"""
Constructor method : is initialized with the number of players.
"""
# History for each player, cards that have been played.
self.players = [{card : 0 for card in ranks[0].keys()} for k in xrange(n)]
# Remaining cards in the game
self.remaining_cards = {card : 4 for card in ranks[0].keys()}
# Number of cards left for each player
self.nb_cards_player = dict({player : 52 // n + 1 for player in xrange(n) if (player < 52 % n)}, **{player : 52 // n for player in xrange(n) if(player >= 52 % n)})
def our_hand(self, hand):
"""
Function that substract from the remaining cards (history) the cards of the player.
Parameters
----------
- hand : hand of the player, list of lists [['A',3], ['3',2]] for instance
"""
for card in hand:
self.remaining_cards[card[0]] -= card[1]
return None
def update(self, player, move):
"""
Function that updates the history.
Parameters
----------
- player : the player that played the game.
- move : the card and number of cards played. move = ['A', 3] for instance.
"""
# If the move does not mean passing
if move[1] != 0:
# Updating the cards played by the player
self.players[player][move[0]] += move[1]
# Updating the number of cards of the value played
self.remaining_cards[move[0]] -= move[1]
# Updating the cards left for the player
self.nb_cards_player[player] -= move[1]
return None
class player:
"""
Class representing a player with a hand, and an agent to make decisions
"""
def __init__(self, Agent, cards):
# Defining an agent for the player
self.agent = Agent
self.cards = cards
# np.array([0 for k in xrange(15)] + hand)
# Status of the player (president, people ...)
self.status = 'People'
# is the player playing ?
self.out = 0
def possible_moves(self, last, revolution=0, pass_=False):
"""
Method that determines the possible moves for the player.
Parameters
----------
last : tuple containing (card_chosen, number_of_cards)
revolution : binarary variable
pass_ : if the player has passed
"""
# If the agent is actually a realer player : show the cards
if str(self.agent).split('.')[1].split(' ')[0] == "RealPlayer" :
print(self.cards)
# Default possible move
possible_moves_ = [(0, 0)]
# Only not playing option
if pass_:
return possible_moves_
# If the player doesn't initiate the turn
if last[1] != 0:
# Get the value of the last card(s)
v = ranks[revolution][last[0]]
# Get the value of the cards in hand
w = [ranks[revolution][card] for card in self.cards]
# Get the playable cards with their cardinality
L = {n: np.sum([n == value for value in w]) for n in xrange(1 + v, 1 + rank_max)}
# If the player is trou or vice-trou, the equality right activates
if self.status in statuses[3:]:
L[v] = np.sum([v == value for value in w])
# Adding the possible moves : play the same number of cards as the last play
for n in L.keys() :
if L[n] >= last[1]:
possible_moves_.append((rev_ranks[self.agent.revolution][n], last[1]))
# If the player initiates the turn
else :
# Get the value of the cards in hand
w = [ranks[revolution][card] for card in self.cards]
# Get the cardinality of the cards at hand
L = {n : np.sum([n == value for value in w]) for n in set(w)}
# Adding the possibe moves : play any possible number of any card at hand
for n in L.keys():
for k in xrange(L[n]):
possible_moves_.append((rev_ranks[self.agent.revolution][n], 1+k))
return possible_moves_
def play(self, move):
"""
Method that removes the card played.
Parameters
----------
move : move done. (tuple)
"""
if move == (0, 0):
return None
else:
for k in xrange(move[1]):
self.cards.remove(move[0])
return None
def choose(self, last, revolution, history, counter, pass_, heuristics) :
"""
Method that chooses an action according to the agent used.
Parameters
----------
last : tuple containing (card_chosen, number_of_cards).
revolution : binarary variable.
history : historic of the game (class).
counter : counter used by certain agents (Q-Learning, Bandits).
pass_ : binary variable.
"""
# Last,cards,history,revo,counter
self.agent.updateState(last, self.cards, history, revolution, counter, heuristics)
return self.agent.choose(self.possible_moves(last, revolution, pass_))
# Method to update the agent
def update(self, reward, last, history, revolution, counter, heuristics):
"""
Method that updates the agent.
Parameters
----------
reward : the reward obtained.
last : tuple containing (card_chosen, number_of_cards).
revolution : binarary variable.
history : historic of the game (class).
counter : counter used by certain agents (Q-Learning, Bandits).
"""
# Storing the rewards obtained
self.agent.rewards.append(reward)
self.agent.update(reward, last, self.cards, history, revolution, self.possible_moves(last, revolution), counter, heuristics)
return None
# Game class containing players each with their own agent they use to make decisions
class GAME:
""" Class that handles the game, contains the players with their agents """
def __init__(self, agents, number_player = 4, final = [18, 10, 0, -10, -18], verbose = True):
"""
Constructor method : is initialized with the number of players.
Parameters
----------
last : tuple containing (card_chosen, number_of_cards).
final : rewards for each players.
"""
# The stack is empty
self.last = (0, 0)
# Setting the rewards
self.final = game_reward(number_player, final)
# Shuffling the cards
np.random.shuffle(deck)
# Counting the cards
q = 52 // number_player
r = 52 % number_player
self.players = []
# Creating the players and distributing the cards
if r == 0:
self.players += [player(agents[k], list(deck[k * q: (k+1) * q])) for k in xrange(number_player)]
else :
self.players += [player(agents[k], list(deck[k * (q+1): (k+1) * (q+1)])) for k in xrange(r)]
self.players += [player(agents[k], list(deck[r * (q+1) + k * q: r * (q+1) + (k+1) * q])) for k in xrange(number_player-r)]
# Setting the order at which the players play
self.order = range(number_player)
# No revolution at the begining of the game
self.revolution = 0
# The first player starts
self.actual_player = 0
# History of cards played
self.history = history(number_player)
# Counter of people that left the game
self.counter = 0
# Counting the passes for the initiative transfer
self.passes = 0
# The heuristics variables coded (list of variables)
self.heuristics = [0 for i in xrange(number_player)]
# Verbose (printing or not)
self.verbose = verbose
def reset(self):
"""
Method that resets the game.
"""
# Resetting the game
self.last = (0, 0)
np.random.shuffle(deck)
number_of_players = len(self.players)
q = 52 // number_of_players
r = 52 % number_of_players
if r != 0:
for k in xrange(r):
self.players[k].cards = list(deck[(q+1) * k: (q+1) * (k+1)])
for k in xrange(number_of_players - r):
self.players[r+k].cards = list(deck[(q+1) * r + q * k: (q+1) * r + q * (k+1)])
else:
for k in xrange(number_of_players):
self.players[k].cards = list(deck[q * k: q * (k+1)])
self.revolution = 0
self.counter = 0
# Doing the exchange of cards for the two highest and two lowest ranked
ind = [0 for k in xrange(4)]
exchanges = [0 for k in xrange(4)]
# Choosing the cards to be exchanged
for i in xrange(number_of_players):
if self.players[i].status == 'Trou':
ind[0] = i
exchanges[0] = find_best(2, self.players[i].cards)
if self.verbose :
print("Player "+str(i)+" is the trou.")
if self.players[i].status == 'Vice-trou':
ind[1] = i
exchanges[1] = find_best(1, self.players[i].cards)
if self.verbose :
print("Player "+str(i)+" is the vice-trou.")
if self.players[i].status == 'Vice-president':
ind[2] = i
exchanges[2] = find_worst(1, self.players[i].cards)
if self.verbose :
print("Player "+str(i)+" is the vice-president.")
if self.players[i].status == 'President':
ind[3] = i
exchanges[3] = find_worst(2, self.players[i].cards)
if self.verbose :
print("Player "+str(i)+" is the president.")
# If we're not at the begining of the game, we perform the exchanges and set the order
if exchanges[0] != 0:
for i in xrange(4):
self.players[ind[i]].cards += exchanges[3-i]
for card in exchanges[i]:
self.players[ind[i]].cards.remove(card)
for k in xrange(len(self.players)):
self.order[self.players[k].out - 1] = number_of_players - 1 - k
for k in xrange(len(self.players)):
self.players[k].out = 0
self.history = history(number_of_players)
if self.verbose :
time.sleep(0.01)
print("New game starts.")
return None
def play_turn(self):
"""
Method that plays the turn for each player
"""
# This function is used to play a turn of each player
actual_player = self.actual_player
if self.passes == len(self.players) - 1 - self.counter:
self.last = (0, 0)
self.passes = 0
# We check here if the player is still in the game
if self.players[self.order[actual_player]].out == 0:
# Here each agent player chooses an action according to different parameters
move = self.players[self.order[actual_player]].choose(self.last,
self.revolution,
self.history,
self.counter,
(self.last[0] == rev_ranks[self.revolution][rank_max]),
self.heuristics)
# Updating the heuristic
self.heuristics[self.order[actual_player]] = move[1]
# If the agent chooses to play, we update the stack and the history values
if move[0] != 0:
self.last = move
self.history.update(self.order[actual_player], move)
# Updating the hand
self.players[self.order[actual_player]].play(move)
self.passes = 0
# Logging what has the player
if self.verbose :
print("Player "+str(self.order[actual_player])+" has thrown : " + str(move) + " and has " +
str(len(self.players[self.order[actual_player]].cards)) + " cards left.")
time.sleep(1)
# Compute the reward
reward = move[1] * rewards[self.revolution][move[0]]
# Here we check if the agent/player has and empty hand
if len(self.players[self.order[actual_player]].cards) == 0:
if self.verbose :
print("Player "+str(self.order[actual_player])+" is out.")
# Lets leave this game
self.counter += 1
# Increasing the counter to see how many people have left the game
self.players[self.order[actual_player]].out = self.counter
self.order[actual_player] = len(self.players) - self.players[self.order[actual_player]].out
# Updating the final reward
reward += self.final[self.counter]
# Now we update all the status according to the counter
if self.counter == 1:
self.players[self.order[actual_player]].status = 'President'
elif self.counter == 2:
self.players[self.order[actual_player]].status = 'Vice-president'
elif self.counter == len(self.players) - 1:
self.players[self.order[actual_player]].status = 'Vice-trou'
elif self.counter == len(self.players):
self.players[self.order[actual_player]].status = 'Trou'
else:
self.players[self.order[actual_player]].status = 'People'
# Let's use some reinforcement learning and learn!
self.players[self.order[actual_player]].update(reward, self.last, self.history, self.revolution, self.counter, self.heuristics)
# This is the case where the Agent/player doesnt want to throw a card
else:
if self.verbose :
print("Player "+str(self.order[actual_player]) + " passes his turn and has "
+ str(len(self.players[self.order[actual_player]].cards)) + " cards left.")
self.passes += 1
# Update who the player is
if actual_player + 1 == len(self.players):
self.actual_player = 0
else:
self.actual_player += 1
return None
def play_game(self):
"""
Method that plays the whole game.
"""
self.reset()
while self.counter < len(self.players):
self.play_turn()
if self.verbose :
for k in xrange(len(self.players)):
print("Player " + str(k) + " has ended as the " + self.players[k].status + ".")
return None