-
Notifications
You must be signed in to change notification settings - Fork 58
/
asterix.py
156 lines (138 loc) · 6.08 KB
/
asterix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
################################################################################################################
# Authors: #
# Kenny Young (kjyoung@ualberta.ca) #
# Tian Tian (ttian@ualberta.ca) #
################################################################################################################
import numpy as np
#####################################################################################################################
# Constants
#
#####################################################################################################################
ramp_interval = 100
init_spawn_speed = 10
init_move_interval = 5
shot_cool_down = 5
#####################################################################################################################
# Env
#
# The player can move freely along the 4 cardinal directions. Enemies and treasure spawn from the sides. A reward of
# +1 is given for picking up treasure. Termination occurs if the player makes contact with an enemy. Enemy and
# treasure direction are indicated by a trail channel. Difficulty is periodically increased by increasing the speed
# and spawn rate of enemies and treasure.
#
#####################################################################################################################
class Env:
def __init__(self, ramping=True):
self.channels ={
'player':0,
'enemy':1,
'trail':2,
'gold':3
}
self.action_map = ['n','l','u','r','d','f']
self.ramping = ramping
self.random = np.random.RandomState()
self.reset()
# Update environment according to agent action
def act(self, a):
r = 0
if(self.terminal):
return r, self.terminal
a = self.action_map[a]
# Spawn enemy if timer is up
if(self.spawn_timer==0):
self._spawn_entity()
self.spawn_timer = self.spawn_speed
# Resolve player action
if(a=='l'):
self.player_x = max(0, self.player_x-1)
elif(a=='r'):
self.player_x = min(9, self.player_x+1)
elif(a=='u'):
self.player_y = max(1, self.player_y-1)
elif(a=='d'):
self.player_y = min(8, self.player_y+1)
# Update entities
for i in range(len(self.entities)):
x = self.entities[i]
if(x is not None):
if(x[0:2]==[self.player_x,self.player_y]):
if(self.entities[i][3]):
self.entities[i] = None
r+=1
else:
self.terminal = True
if(self.move_timer==0):
self.move_timer = self.move_speed
for i in range(len(self.entities)):
x = self.entities[i]
if(x is not None):
x[0]+=1 if x[2] else -1
if(x[0]<0 or x[0]>9):
self.entities[i] = None
if(x[0:2]==[self.player_x,self.player_y]):
if(self.entities[i][3]):
self.entities[i] = None
r+=1
else:
self.terminal = True
# Update various timers
self.spawn_timer -= 1
self.move_timer -= 1
#Ramp difficulty if interval has elapsed
if self.ramping and (self.spawn_speed>1 or self.move_speed>1):
if(self.ramp_timer>=0):
self.ramp_timer-=1
else:
if(self.move_speed>1 and self.ramp_index%2):
self.move_speed-=1
if(self.spawn_speed>1):
self.spawn_speed-=1
self.ramp_index+=1
self.ramp_timer=ramp_interval
return r, self.terminal
# Spawn a new enemy or treasure at a random location with random direction (if all rows are filled do nothing)
def _spawn_entity(self):
lr = self.random.rand() < 1/2
is_gold = self.random.rand() < 1/3
x = 0 if lr else 9
slot_options = [i for i in range(len(self.entities)) if self.entities[i]==None]
if(not slot_options):
return
slot = slot_options[self.random.randint(len(slot_options))]
self.entities[slot] = [x,slot+1,lr,is_gold]
# Query the current level of the difficulty ramp, could be used as additional input to agent for example
def difficulty_ramp(self):
return self.ramp_index
# Process the game-state into the 10x10xn state provided to the agent and return
def state(self):
state = np.zeros((10,10,len(self.channels)),dtype=bool)
state[self.player_y,self.player_x,self.channels['player']] = 1
for x in self.entities:
if(x is not None):
c = self.channels['gold'] if x[3] else self.channels['enemy']
state[x[1], x[0],c] = 1
back_x = x[0]-1 if x[2] else x[0]+1
if(back_x>=0 and back_x<=9):
state[x[1], back_x, self.channels['trail']] = 1
return state
# Reset to start state for new episode
def reset(self):
self.player_x = 5
self.player_y = 5
self.entities = [None]*8
self.shot_timer = 0
self.spawn_speed = init_spawn_speed
self.spawn_timer = self.spawn_speed
self.move_speed = init_move_interval
self.move_timer = self.move_speed
self.ramp_timer = ramp_interval
self.ramp_index = 0
self.terminal = False
# Dimensionality of the game-state (10x10xn)
def state_shape(self):
return [10,10,len(self.channels)]
# Subset of actions that actually have a unique impact in this environment
def minimal_action_set(self):
minimal_actions = ['n','l','u','r','d']
return [self.action_map.index(x) for x in minimal_actions]