-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprojetMajeure.py
876 lines (752 loc) · 25.8 KB
/
projetMajeure.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# PROJET MAJEURE - Reinforcement Learning
# ERBACHER, Pierre
# FOLETTO PIMENTA, Pedro
# TIZON, Nicolas
import sys
import time
#import matplotlib.pyplot as plt
from PyQt5.QtWidgets import QWidget, QPushButton, QApplication
from PyQt5.QtGui import QPainter, QColor, QFont, QBrush, QPixmap
from PyQt5.QtCore import Qt, QTimer
from interface import *
from state import *
import numpy as np
import random as rd
import math
from os import system as osSystem
# WINDOW / GAME_AREA
## Modifié pour commencer avec WINDOW_, GAME_AREA_
# ATTENTION : Ces variables sont également définies dans projetMajeure.py.
# Pensez à les modifier là-bas aussi ou à faire une variable partagée
WINDOW_WIDTH = 800
WINDOW_HEIGHT = 500
GAME_AREA_WIDTH = 500
GAME_AREA_HEIGHT = 500
# AGENT
## Modifié pour commencer avec AGENT_
AGENT_PV_INITIAL = 3
AGENT_WIDTH = 32
AGENT_HEIGHT = 32
AGENT_VITESSE = 10
NB_AGENTS_BLEUS = 4
NB_AGENTS_ROUGES = 2
#AGENT_DR = np.pi/30
AGENT_DR = 5 #en degrés
# PROJECTILE
## Modifié pour commencer avec PROJECTILE_
PROJECTILE_DAMAGE = 1
PROJECTILE_WIDTH = 8
PROJECTILE_HEIGHT = 8
PROJECTILE_VITESSE = 20
# RESSOURCE
## Pas modifié, juste organisé
RESOURCE_WIDTH = 24
RESOURCE_HEIGHT = 24
RESOURCE_REWARD = 400
NB_MAX_RESOURCES = 3
global startTime
startTime = 0
# TEAM
## Modifié pour commencer avec TEAM_
TEAM_BLUE = 1
TEAM_RED = 2
# IMAGEPATHS
IMAGEPATH_404 = "Images/ImageNotFound.png"
IMAGEPATH_AGENT_BLEU = "Images/AgentBleu.png"
IMAGEPATH_AGENT_ROUGE = "Images/AgentRouge.png"
IMAGEPATH_AGENT_NEUTRE = "Images/AgentNeutre.png"
IMAGEPATH_RESOURCE = "Images/Resource2.png"
IMAGEPATH_PROJECTILE_BLEU = "Images/Projectile_Bleu.png"
IMAGEPATH_PROJECTILE_ROUGE = "Images/Projectile_Rouge.png"
#Action possible
## Modifié pour commencer avec ACTION_
"""
ACTION_STOP = 0
ACTION_MOVE = 1
ACTION_TRIGO = 2
ACTION_HORAIRE = 3
ACTION_SHOOT = 4
ACTION_BUILD = 5
"""
ACTION_STOP = 5
ACTION_MOVE = 0
ACTION_TRIGO = 1
ACTION_HORAIRE = 2
ACTION_SHOOT = 3
ACTION_BUILD = 8008135
ACTIONS = [ACTION_MOVE,ACTION_TRIGO,ACTION_HORAIRE, ACTION_SHOOT]
#TYPE
## Modifié pour commencer avec TYPE
TYPE_RESOURCE = 0
TYPE_AGENT = 1
TYPE_PROJECTILE = 2
TYPE_BLOCK = 3
#TRAINING
## Modifié pour commencer avec TRAINING_
TRAINING_N_EPISODE = 500
TRAINING_N_STEP = 750
TRAINING_GAMMA = 0.95
TRAINING_LEARNING_RATE = 0.01
#N_STATE = len([0:np.sqrt(H*H+W*W):])
global q_table_E1, q_table_E2
q_table_E1 = {} #equipe bleu
q_table_E2 = {} #equipe rouge
eps = 0.05
global total_reward_bleu, total_reward_rouge
total_reward_bleu = []
total_reward_rouge = []
class Objet:
# classe pour tout les object qui sont dans l'environnement
x = 0 # valeur par default
y = 0
dx = 0 # vitesses
dy = 0
angle = 0
width = 5 #collision boxes ( largeur et longeur de l'objet)
height = 5
team = 0
type_obj = 0
pixmap = None
def __init__(self,x=0,y=0, dx=0, dy=0 ,angle = 0, team = 0,type_obj = 0):
self.x = x;
self.y = y;
self.dx = dx;
self.dy = dy;
self.angle = angle
self.team = team
self.type_obj = type_obj
pixmapPath = IMAGEPATH_404
self.width = 32
self.height = 32
if type_obj == TYPE_RESOURCE:
#ressource
self.width = RESOURCE_WIDTH
self.height = RESOURCE_HEIGHT
pixmapPath = IMAGEPATH_RESOURCE
elif type_obj == TYPE_AGENT:
#L'objet est un agent
self.width = AGENT_WIDTH
self.height = AGENT_HEIGHT
if team == TEAM_BLUE:
#Agent bleu
pixmapPath = IMAGEPATH_AGENT_BLEU
elif team == TEAM_RED:
#Agent rouge
pixmapPath = IMAGEPATH_AGENT_ROUGE
else:
#Agent neutre
pixmapPath = IMAGEPATH_AGENT_NEUTRE
elif type_obj == TYPE_PROJECTILE:
#l'objet est un projectile
self.width = PROJECTILE_WIDTH
self.height = PROJECTILE_HEIGHT
if team == TEAM_BLUE:
#Agent bleu
pixmapPath = IMAGEPATH_PROJECTILE_BLEU
elif team == TEAM_RED:
#Agent rouge
pixmapPath = IMAGEPATH_PROJECTILE_ROUGE
self.pixmap = QPixmap()
self.pixmap.load(pixmapPath)
def move(self):
# bouger l'agent et tir d'accord avec sa vitesse
#self.x = min(max(0,self.x + self.dx), GAME_AREA_WIDTH - self.pixmap.width())
#self.y = min(max(0,self.y + self.dy), GAME_AREA_HEIGHT - self.pixmap.height())
self.x = max(0,self.x + self.dx)
self.y = max(0,self.y + self.dy)
if(self.x + self.width >= GAME_AREA_WIDTH):
self.x = GAME_AREA_WIDTH - self.width
if(self.y + self.height >=GAME_AREA_HEIGHT):
self.y = GAME_AREA_HEIGHT - self.height
def collision(self,obj):
#verifie la collision entre les objets
ret = False
#if (self.x + self.width < obj.x - obj.width and self.x - self.width > obj.x + obj.width and self.y + self.height < obj.y - obj.height and self.y - self.height > obj.y + obj.height):
if ((self.x + self.width < obj.x or self.x > obj.x + obj.width) or (self.y + self.height < obj.y or self.y > obj.y + obj.height)):
ret = False
else:
ret = True
return ret
def draw(self, qp):
#affichage de l'objet
## Modifié pour inclure la rotation de l'objet
r = np.deg2rad(self.angle)
w=0
h=0
if self.type_obj == TYPE_RESOURCE:
w = RESOURCE_WIDTH
h = RESOURCE_HEIGHT
elif self.type_obj == TYPE_AGENT:
w = AGENT_WIDTH
h = AGENT_HEIGHT
elif self.type_obj == TYPE_PROJECTILE:
w = PROJECTILE_WIDTH
h = PROJECTILE_HEIGHT
w2 = w/2
h2 = h/2
x1 = (self.x + w2)*np.cos(r) + (self.y + h2)*np.sin(r) - w2
y1 = -(self.x + w2)*np.sin(r) + (self.y + h2)*np.cos(r) - h2
qp.rotate(self.angle)
qp.drawPixmap(x1,y1, w, h, self.pixmap); #peut-être besoin de W et H
qp.rotate(-self.angle)
class Agent(Objet):
# classe pour les agents
current_action = 0
aTire = False
aMange = False
estTouche = False
estMort = False
cibleTouchee = False
def __init__(self,x=0,y=0, angle = 0, team = 0):
dx = 0
dy = 0
Objet.__init__(self,x,y,dx,dy,angle,team, TYPE_AGENT);
self.pv = AGENT_PV_INITIAL
self.reload = 0
self.reloadMax = 4
self.aMange = False
self.estTouche = False
self.estMort = False
self.cibleTouchee = False
self.pommesMangees = 0
self.victimes = 0
def shoot(self):
if self.reload == 0:
self.aTire = True
self.reload = self.reloadMax
projectile = Tir(self)
game.objectsList.append(projectile)
game.list_projectile.append(projectile)
def takeDamage(self, p):
self.pv = self.pv - p.dmg
self.estTouche = True
if self.pv < 0:
self.spawn()
self.estMort = True
p.agent.victimes = p.agent.victimes+1
def spawn(self):
angle = rd.randint(0,360)
if self.team == 1: #BLEU
x = rd.randint(0,100-AGENT_WIDTH)
y = rd.randint(0,GAME_AREA_HEIGHT-AGENT_HEIGHT)
elif self.team == 2: #ROUGE
x = rd.randint(GAME_AREA_WIDTH-100,GAME_AREA_WIDTH-AGENT_WIDTH)
y = rd.randint(0,GAME_AREA_HEIGHT-AGENT_HEIGHT)
else:
print("ERR: INCORRECT AGENT")
self.x = x
self.y = y
self.angle = angle
self.pv = AGENT_PV_INITIAL
def execute_action(self, state):
# faire une action
#self.current_action = self.take_action(state) # choisir l'action
if (self.current_action == ACTION_STOP):
self.dy = 0
self.dx = 0
elif (self.current_action == ACTION_MOVE):
self.dy = int(round(-AGENT_VITESSE*np.cos(np.deg2rad(self.angle)))) #maj des prochains dépacment
self.dx = int(round(AGENT_VITESSE*np.sin(np.deg2rad(self.angle))))
elif (self.current_action == ACTION_TRIGO):
self.dy = 0
self.dx = 0
self.angle = self.angle - AGENT_DR
if (self.angle < -360):
self.angle = self.angle + 360
elif (self.current_action == ACTION_HORAIRE):
self.dy = 0
self.dx = 0
self.angle = self.angle + AGENT_DR
if (self.angle > 360):
self.angle = self.angle - 360
elif(self.current_action == ACTION_SHOOT):
self.dy = 0
self.dx = 0
self.shoot()
else:
print("ERR: INCORRECT ACTION BEING EXECUTED")
"""
def take_action(self, state):
# choisir une action
# TODO
# comportement temporaire :
rdm_prob = rd.uniform(0,1)
if (rdm_prob < 0.10): # 10%
return ACTION_MOVE
elif (rdm_prob < 0.40): #30%
return ACTION_TRIGO
elif (rdm_prob < 0.90): #50%
return ACTION_HORAIRE
elif (rdm_prob < 0.95): #5%
return ACTION_SHOOT
else : #5%
return ACTION_STOP
"""
def distanceWithObj(self,objet):
return np.sqrt((objet.x - self.x)*(objet.x - self.x) + (objet.y - self.y)*(objet.y - self.y))
def angleWithObj(self,objet):
a = np.rad2deg(math.atan2( objet.x - self.x, -objet.y + self.y)) - self.angle
if (a > 180):
a = a - 360
if (a < -180):
a = a + 360
return a
def place_block(self):
# placer un bloque en face du agent
pass # TODO
class Tir(Objet):
# classe pour les tirs
def __init__(self,agent):
self.dy = int(round(-PROJECTILE_VITESSE*np.cos(np.deg2rad(agent.angle)))) #maj des prochains dépacment
self.dx = int(round(PROJECTILE_VITESSE*np.sin(np.deg2rad(agent.angle))))
Objet.__init__(self,agent.x,agent.y,self.dx,self.dy,agent.angle,agent.team,TYPE_PROJECTILE)
self.x = agent.x + agent.width/2 - self.width/2
self.y = agent.y + agent.height/2 - self.height/2
self.dmg = PROJECTILE_DAMAGE
self.agent = agent
def move(self):
self.x = self.x + self.dx
self.y = self.y + self.dy
# si le tir sort l'ecran il disparait
#if(self.x + self.width >= GAME_AREA_WIDTH or self.y + self.height >= GAME_AREA_HEIGHT or self.x < 0 or self.y < 0):
# game.objectsList.remove(self)
# game.list_projectile.remove(self)
class Block(Objet):
# classe pour les blocks Mur
def __init__(self,x=0,y=0):
Objet.__init__(self,x,y,0,0,0,0,TYPE_BLOCK);
class Resource(Objet):
# classe pour les rescources
def __init__(self,x=0,y=0):
Objet.__init__(self,x,y,0,0,0,0,TYPE_RESOURCE);
self.reward = RESOURCE_REWARD
def q(state, action = None, team = 0):
global q_table_E1, q_table_E2
if team == 1: # EQUIPE BLEU
q_table = q_table_E1
elif team == 2: # EQUIPE ROUGE
q_table = q_table_E2
else: # error
print("ERROR in q function : equipe doit etre 1 ou 2")
return
if state.stateID not in q_table.keys():
q_table[state.stateID] = [0 for i in ACTIONS]
if action is None:
return q_table[state.stateID]
return q_table[state.stateID][action]
def markov_process(state, agent, isTraining):
if (isTraining and rd.uniform(0,1) < eps):
return rd.choice(ACTIONS)
else:
qt = q(state, team=agent.team)
actions = np.argwhere(qt == np.amax(qt))
a = rd.choice(actions)
a = a[0]
return a
def calcul_reward(current_state,next_state):
reward = 0
agent = next_state.agent
#AGENT BLEU
#Récompensé pour manger des ressources
#Aucune récompense pour tuer l'adversaire
#Pénalisé pour être blessé
if agent.team == 1:
#A Mangé
if(next_state.agent.aMange):
reward = reward + 150
#print(agentName + " a mangé")
#A Tiré
if(agent.aTire):
reward = reward - 10
"""
#Distance Ressource
d = (current_state.D_resource_min - next_state.D_resource_min - 2)
reward = reward - d
"""
"""
#Angle Ressource
if(abs(next_state.A_resource_min) < abs(current_state.A_resource_min) and not agent.estMort):
reward = reward + 2
elif(abs(next_state.A_resource_min) > abs(current_state.A_resource_min) and not agent.estMort):
reward = reward - 2
else:
reward = reward - 0.02
#Distance Ennemi
if (next_state.D_ennemy_min < current_state.D_ennemy_min and not agent.estMort):
reward = reward + 5
elif (next_state.D_ennemy_min > current_state.D_ennemy_min and not agent.estMort):
reward = reward - 5
else:
reward = reward - 0.05
#Angle Ennemi
if (next_state.A_ennemy_min > current_state.A_ennemy_min and not agent.estMort):
reward = reward + 1
elif (next_state.A_ennemy_min < current_state.A_ennemy_min and not agent.estMort):
reward = reward - 1
else:
reward = reward - 0.01
"""
"""
#Distance Ressource
if (next_state.S_D_resource_min < current_state.S_D_resource_min and not agent.estMort):
reward = reward + 100
elif (next_state.S_D_resource_min > current_state.S_D_resource_min and not agent.estMort):
reward = reward - 100
else:
reward = reward - 1
#Angle Ressource
if(abs(next_state.S_A_resource_min) < abs(current_state.S_A_resource_min) and not agent.estMort):
reward = reward + 20
elif(abs(next_state.S_A_resource_min) > abs(current_state.S_A_resource_min) and not agent.estMort):
reward = reward - 20
else:
reward = reward - 0.2
#Distance Ennemi
if (next_state.S_D_ennemy_min < current_state.S_D_ennemy_min and not agent.estMort):
reward = reward + 50
elif (next_state.S_D_ennemy_min > current_state.S_D_ennemy_min and not agent.estMort):
reward = reward - 50
else:
reward = reward - 0.5
#Angle Ennemi
if (next_state.S_A_ennemy_min > current_state.S_A_ennemy_min and not agent.estMort):
reward = reward + 10
elif (next_state.S_A_ennemy_min < current_state.S_A_ennemy_min and not agent.estMort):
reward = reward - 10
else:
reward = reward - 0.1
"""
#AGENT ROUGE
#Aucune récompense pour manger des ressources
#Récompensé pour tuer l'ennemi
#Pénalisé pour être blessé
elif agent.team == 2:
# Est Touché
if(next_state.agent.estTouche):
reward = reward - 200
#print(agentName + " est touché")
# Est Mort (touché trois fois)
if(next_state.agent.estMort):
reward = reward - 800
#print(agentName + " est mort")
# Cible Touchée
if(next_state.agent.cibleTouchee):
reward = reward + 500
#print(agentName + " a touché une cible")
"""
#Angle Ennemi
if(abs(next_state.S_A_ennemy_min) < abs(current_state.S_A_ennemy_min)):
reward = reward + 50
elif(abs(next_state.S_A_ennemy_min) > abs(current_state.S_A_ennemy_min)):
reward = reward - 50
else:
reward = reward - 0.5
"""
"""
if not game.isTraining:
if agent.team == 1:
blueScore = blueScore + reward
else:
redScore = redScore + reward
"""
#Réinitialiser les booléennes
agent.estTouche = False
agent.estMort = False
agent.cibleTouchee = False
agent.aManger = False
agent.aTire = False
return reward
#entrainement de tout les agents
def qtrain():
global q_table_E1, q_table_E2, total_reward_bleu, total_reward_rouge
list_total_reward = [0 for agent in game.list_agent]
list_action = [None for agent in game.list_agent]
list_state = [None for agent in game.list_agent]
total_reward_bleu = [0 for i in range(TRAINING_N_EPISODE)]
total_reward_rouge = [0 for i in range(TRAINING_N_EPISODE)]
total_pommes_bleu = [0 for i in range(TRAINING_N_EPISODE)]
total_pommes_rouge = [0 for i in range(TRAINING_N_EPISODE)]
total_victimes_bleu = [0 for i in range(TRAINING_N_EPISODE)]
total_victimes_rouge = [0 for i in range(TRAINING_N_EPISODE)]
#Pour chaque partie
for i in range(TRAINING_N_EPISODE):
# variation du nombre des agents a chaque episode
#game.window_nb_agents_E1 = rd.randint(1, 5)
#game.window_nb_agents_E2 = rd.randint(0, 5)
game.reset()
#vider les lists
p = np.floor(1000*i/TRAINING_N_EPISODE)/10
list_total_reward = [0 for agent in game.list_agent]
list_action = [None for agent in game.list_agent]
list_state = [None for agent in game.list_agent]
osSystem("clear") #Vider la console
#Message affiché à chaque itération
runTime = np.floor(time.time() - startTime)
timeLeft = np.floor((TRAINING_N_EPISODE-i)*(runTime/(i or 1)))
runTime_h = int(runTime/3600)
runTime_m = int(runTime/60)%60
runTime_s = int(runTime%60)
timeLeft_h = int(timeLeft/3600)
timeLeft_m = int(timeLeft/60)%60
timeLeft_s = int(timeLeft%60)
runTime_h = (runTime_h < 10 and "0" or " ") + str(runTime_h)
runTime_m = (runTime_m < 10 and ":0" or ":") + str(runTime_m)
runTime_s = (runTime_s < 10 and ":0" or ":") + str(runTime_s)
timeLeft_h = (timeLeft_h < 10 and "0" or " ") + str(timeLeft_h)
timeLeft_m = (timeLeft_m < 10 and ":0" or ":") + str(timeLeft_m)
timeLeft_s = (timeLeft_s < 10 and ":0" or ":") + str(timeLeft_s)
print("Entrainement en cours...")
print("Episode : " + str(i) + "/" + str(TRAINING_N_EPISODE))
print("(" + str(p) + "%)")
print("Time elapsed: " + runTime_h + runTime_m + runTime_s)
print("Estimated time left: " + timeLeft_h + timeLeft_m + timeLeft_s)
print("\nQ_table sizes:")
print("BLUE: " + str(len(q_table_E1.keys())) + " RED: " + str(len(q_table_E2.keys())))
#initialisation ETAT agent
for k in range(len(game.list_agent)):
list_state[k] = State(game.list_agent[k],game.objectsList); #Stock les etats de chaque agent
#Debut partie
for j in range(TRAINING_N_STEP):
list_action = takeAllActions(list_state)
game.update()
updateQTable(list_state, list_action, list_total_reward)
#stockage des total rewards
for k in range(len(game.list_agent)):
if game.list_agent[k].team == 1: # BLEU
total_reward_bleu[i] = total_reward_bleu[i] + list_total_reward[k]
total_pommes_bleu[i] = total_pommes_bleu[i] + game.list_agent[k].pommesMangees
total_victimes_bleu[i] = total_victimes_bleu[i] + game.list_agent[k].victimes
elif game.list_agent[k].team == 2: # ROUGE
total_reward_rouge[i] = total_reward_rouge[i] + list_total_reward[k]
total_pommes_rouge[i] = total_pommes_rouge[i] + game.list_agent[k].pommesMangees
total_victimes_rouge[i] = total_victimes_rouge[i] + game.list_agent[k].victimes
#print("R_bleu = " + str(total_reward_bleu))
#print("R_rouge = " + str(total_reward_rouge))
print("P_bleu = " + str(total_pommes_bleu))
print("P_rouge = " + str(total_pommes_rouge))
print("V_bleu = " + str(total_victimes_bleu))
print("V_rouge = " + str(total_victimes_rouge))
def takeAllActions(list_state):
# pour chaque etat d'agent choisi une action associé grace a la Q_table
list_action = [None for agent in game.list_agent]
for m in range(len(list_state)):
list_action[m] = markov_process(list_state[m], game.list_agent[m], True)
game.list_agent[m].current_action = list_action[m] #met a jour l'action de l'agent dans la classe
game.list_agent[m].execute_action(list_state[m])
return list_action
def updateQTable(list_state, list_action, list_total_reward):
current_learning_rate = TRAINING_LEARNING_RATE #0.001 #A changer
for m in range(len(game.list_agent)):
# get agent team
team = list_state[m].agent.team
#CALCUL les nouvelles etats
next_state = State(list_state[m].agent,game.objectsList)
#CALCUL Reward R pour chaque agent
reward = calcul_reward(list_state[m],next_state)
#print("debug updateQtable : " + str(m) + " reward : " + str(reward))
list_total_reward[m] = list_total_reward[m] + reward
#CREATION NOUVEL ETAT POUR CHAQUE AGENT
#q(list_state[m])[list_action[m]] = (1-current_learning_rate)*q(list_state[m] , list_action[m]) + current_learning_rate * (reward + TRAINING_GAMMA * max(q(next_state)) - q(list_state[m],list_action[m]))
#q(list_state[m], team=team)[list_action[m]] = (1-current_learning_rate)*q(list_state[m] , list_action[m], team=team) + current_learning_rate * (reward + TRAINING_GAMMA * max(q(next_state,team=team)) - q(list_state[m],list_action[m],team=team))
#pb de 1-2lambda
q(list_state[m], team=team)[list_action[m]] = (1-current_learning_rate)*q(list_state[m] , list_action[m], team=team) + current_learning_rate * (reward + TRAINING_GAMMA * max(q(next_state,team=team)))
#q(list_state[m])[list_action[m]]
list_state[m] = next_state
#print("Agent: "+ str(m) +" Total reward = "+ str(list_total_reward[m]))
def save_q_tables():
global q_table_E1, q_table_E2
np.save('q_table_E1.npy', q_table_E1)
np.save('q_table_E2.npy', q_table_E2)
print("saved Q-tables")
def load_q_tables():
global q_table_E1, q_table_E2
q_table_E1 = np.load('q_table_E1.npy').item()
q_table_E2 = np.load('q_table_E2.npy').item()
print("loaded Q-tables")
class Game():
#class pour gerer le jeu
objectsList = []
list_agent = []
list_resource = []
list_projectile = []
isPlay = False # flag : False = pause; True = play
isTraining = False
gameCounter = 1 # clock pour time modulo
# initialisations :
# "current" : la valeur utilisee par le jeu
# "window" : la valeur qui l'utilisateur change a la fenettre
current_nb_agents_E1 = NB_AGENTS_BLEUS # equipe 1
window_nb_agents_E1 = NB_AGENTS_BLEUS
current_nb_agents_E2 = NB_AGENTS_ROUGES # equipe 2
window_nb_agents_E2 = NB_AGENTS_ROUGES
current_resource_spawn_rate = 0.02
window_resource_spawn_rate = 0.02
current_learning_rate = TRAINING_LEARNING_RATE
window_learning_rate = TRAINING_LEARNING_RATE
current_random_path_prob = 0 # prob de l'exploration de boltzman
window_random_path_prob = 0 # prob de l'exploration de boltzman
current_time_period = 100 # temps entre frames
window_time_period = 100 # temps entre frames
current_time_modulo = 1 # pour afficher a chaque X frames
window_time_modulo = 1 # pour afficher a chaque X frames
current_nombre_depisodes = 0
window_nombre_depisodes = 0
def __init__(self):
self.creer_agents()
for i in range(3):
self.creer_resource()
def creer_resource(self):
x = rd.randint(100, GAME_AREA_WIDTH - RESOURCE_WIDTH - 100)
y = rd.randint(0, GAME_AREA_WIDTH - RESOURCE_WIDTH)
resource = Resource(x,y)
self.objectsList.append(resource)
self.list_resource.append(resource)
def creer_agents(self):
for i in range(self.current_nb_agents_E1):
a = Agent(0,0,0,1)
a.spawn()
self.objectsList.append(a)
self.list_agent.append(a)
for i in range(self.current_nb_agents_E2):
a = Agent(0,0,0,2)
a.spawn()
self.objectsList.append(a)
self.list_agent.append(a)
def playPause(self):
#print ("DEBUG play pause")
self.isPlay = not(self.isPlay) # toggle flag
pass #TODO ?
def reset(self):
#global blueScore, redScore
self.gameCounter = 0
#blueScore = 0
#redScore = 0
#print ("DEBUG reset") # DEBUG
self.isPlay = False # stop the game
self.objectsList = [] # effacer tous les objets
self.list_agent = []
self.list_resource = []
self.list_projectile = []
# actualiser les valeurs
self.current_nb_agents_E1 = self.window_nb_agents_E1
self.current_nb_agents_E2 = self.window_nb_agents_E2
self.current_resource_spawn_rate = self.window_resource_spawn_rate
self.current_learning_rate = self.window_learning_rate
self.current_random_path_prob = self.window_random_path_prob
self.current_time_period = self.window_time_period
self.current_time_modulo = self.window_time_modulo
self.current_nombre_depisodes = self.window_nombre_depisodes
#debug
if False:
print ("nb_agents_E1: " + str(self.current_nb_agents_E1))
print("nb_agents_E2 : " + str(self.current_nb_agents_E2))
print("resource_spawn_rate " + str(self.current_resource_spawn_rate))
print("learning_rate" + str(self.current_learning_rate))
print("random_path_prob : " + str(self.current_random_path_prob))
print("time_period " + str(self.current_time_period))
print("time_modulo : " + str(self.current_time_modulo))
print("nombre_depisodes : " + str(self.current_nombre_depisodes))
self.creer_agents()
for i in range(NB_MAX_RESOURCES):
self.creer_resource()
if not self.isTraining:
ui.gameWidget.update()
def update_parametres(self):
self.window_nb_agents_E1 = ui.spinBoxE1.value()
self.window_nb_agents_E2 = ui.spinBoxE2.value()
#self.window_resource_spawn_rate = ui.SpinBoxRSR.value()
#self.window_learning_rate = ui.SpinBoxLearningRate.value()
#self.window_random_path_prob = ui.SpinBoxRPP.value()
self.window_time_period = ui.SpinBoxTimePeriod.value()
self.window_time_modulo = ui.spinBoxModulo.value()
self.current_nb_agents_E1 = self.window_nb_agents_E1
self.current_nb_agents_E2 = self.window_nb_agents_E2
self.current_resource_spawn_rate = self.window_resource_spawn_rate
self.current_learning_rate = self.window_learning_rate
self.current_random_path_prob = self.window_random_path_prob
self.current_time_period = self.window_time_period
self.current_time_modulo = self.window_time_modulo
def update(self):
if not self.isTraining:
for agent in self.list_agent:
state = State(agent, self.objectsList)
action = markov_process(state, agent, False)
agent.current_action = action
agent.execute_action(state)
# mouvement des agents et tirs
for objet in self.objectsList:
objet.move()
#rechargement agents
for agent in self.list_agent:
agent.reload = agent.reload-1
if agent.reload < 0:
agent.reload = 0
# collision agent-ressource
for agent in self.list_agent:
for resource in self.list_resource:
if agent.collision(resource):
self.list_resource.remove(resource)
self.objectsList.remove(resource)
#TODO : donner récompense à Agent
agent.aMange = True
agent.pommesMangees = agent.pommesMangees + 1
# collision tir-agent
for agent in self.list_agent:
for p in self.list_projectile:
if agent.collision(p) and agent.team != p.team:
p.agent.cibleTouchee = True
agent.takeDamage(p)
self.objectsList.remove(p)
self.list_projectile.remove(p)
continue
#projectiles sortant du terrain
for p in self.list_projectile:
if(p.x + p.width >= GAME_AREA_WIDTH or p.y + p.height >= GAME_AREA_HEIGHT or p.x < 0 or p.y < 0):
self.objectsList.remove(p)
self.list_projectile.remove(p)
# Ajoute des ressources
while len(self.list_resource) < NB_MAX_RESOURCES:
self.creer_resource()
# main
if __name__ == '__main__':
def timeout():
if(game.isPlay):
# "loop" du jeu
game.update()
if((game.gameCounter%game.current_time_modulo)==0):
ui.gameWidget.update()
game.gameCounter += 1
osSystem("clear")
print("STEP : " + str(game.gameCounter))
#print("BLUE SCORE : " + str(blueScore))
#print("RED SCORE : " + str(redScore))
#timer.stop()
timePeriod = game.current_time_period
#timer.start(timePeriod)
app = QApplication(sys.argv)
game = Game()
if "-l" in sys.argv:
load_q_tables()
if "-t" in sys.argv:
game.isTraining = True
startTime = time.time()
qtrain()
save_q_tables()
#plt.plot(np.linspace(1,TRAINING_N_EPISODES,TRAINING_N_EPISODES,dtype = int), total_reward_bleu)
#plt.plot(np.linspace(1,TRAINING_N_EPISODES,TRAINING_N_EPISODES,dtype = int), total_reward_rouge)
#plt.show()
sys.exit()
else:
fenetre = Fenetre(game)
ui = Ui_MainWindow() # classe cree par QtDesigner
ui.setupUi(fenetre)
game.update_parametres()
osSystem("clear")
timer = QTimer()
timer.timeout.connect(timeout)
timePeriod = ui.SpinBoxTimePeriod.value()
timer.start(timePeriod)
sys.exit(app.exec_())