-
Notifications
You must be signed in to change notification settings - Fork 56
/
Copy pathAgent.py
104 lines (91 loc) · 3 KB
/
Agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# -*- coding: utf-8 -*-
import numpy as np
import tensorflow as tf
class Agent:
def __init__(self,act_dim,algorithm,e_greed=0.1,e_greed_decrement=0):
self.act_dim = act_dim
self.algorithm = algorithm
self.e_greed = e_greed
self.e_greed_decrement = e_greed_decrement
def sample(self, station, soul, hornet_x, hornet_y, player_x, hornet_skill1):
pred_move, pred_act = self.algorithm.model.predict(station)
# print(pred_move)
# print(self.e_greed)
pred_move = pred_move.numpy()
pred_act = pred_act.numpy()
sample = np.random.rand()
if sample < self.e_greed:
move = self.better_move(hornet_x, player_x, hornet_skill1)
else:
move = np.argmax(pred_move)
self.e_greed = max(
0.03, self.e_greed - self.e_greed_decrement)
sample = np.random.rand()
if sample < self.e_greed:
act = self.better_action(soul, hornet_x, hornet_y, player_x, hornet_skill1)
else:
act = np.argmax(pred_act)
if soul < 33:
if act == 4 or act == 5:
pred_act[0][4] = -30
pred_act[0][5] = -30
act = np.argmax(pred_act)
self.e_greed = max(
0.03, self.e_greed - self.e_greed_decrement)
return move, act
def better_move(self, hornet_x, player_x, hornet_skill1):
dis = abs(player_x - hornet_x)
dire = player_x - hornet_x
if hornet_skill1:
# run away while distance < 6
if dis < 6:
if dire > 0:
return 1
else:
return 0
# do not do long move while distance > 6
else:
if dire > 0:
return 2
else:
return 3
if dis < 2.5:
if dire > 0:
return 1
else:
return 0
elif dis < 5:
if dire > 0:
return 2
else:
return 3
else:
if dire > 0:
return 0
else:
return 1
def better_action(self,soul, hornet_x, hornet_y, player_x, hornet_skill1):
dis = abs(player_x - hornet_x)
if hornet_skill1:
if dis < 3:
return 6
else:
return 1
if hornet_y > 34 and dis < 5 and soul >= 33:
return 4
if dis < 1.5:
return 6
elif dis < 5:
if hornet_y > 32:
return 6
else:
act = np.random.randint(self.act_dim)
if soul < 33:
while act == 4 or act == 5:
act = np.random.randint(self.act_dim)
return act
elif dis < 12:
act = np.random.randint(2)
return 2 + act
else:
return 6