-
Notifications
You must be signed in to change notification settings - Fork 58
/
breakout.py
131 lines (117 loc) · 4.98 KB
/
breakout.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
################################################################################################################
# Authors: #
# Kenny Young (kjyoung@ualberta.ca) #
# Tian Tian (ttian@ualberta.ca) #
################################################################################################################
import numpy as np
#####################################################################################################################
# Env
#
# The player controls a paddle on the bottom of the screen and must bounce a ball tobreak 3 rows of bricks along the
# top of the screen. A reward of +1 is given for each brick broken by the ball. When all bricks are cleared another 3
# rows are added. The ball travels only along diagonals, when it hits the paddle it is bounced either to the left or
# right depending on the side of the paddle hit, when it hits a wall or brick it is reflected. Termination occurs when
# the ball hits the bottom of the screen. The balls direction is indicated by a trail channel.
#
#####################################################################################################################
class Env:
def __init__(self, ramping=None):
self.channels ={
'paddle':0,
'ball':1,
'trail':2,
'brick':3,
}
self.action_map = ['n','l','u','r','d','f']
self.random = np.random.RandomState()
self.reset()
# Update environment according to agent action
def act(self, a):
r = 0
if(self.terminal):
return r, self.terminal
a = self.action_map[a]
# Resolve player action
if(a=='l'):
self.pos = max(0, self.pos-1)
elif(a=='r'):
self.pos = min(9,self.pos+1)
# Update ball position
self.last_x = self.ball_x
self.last_y = self.ball_y
if(self.ball_dir == 0):
new_x = self.ball_x-1
new_y = self.ball_y-1
elif(self.ball_dir == 1):
new_x = self.ball_x+1
new_y = self.ball_y-1
elif(self.ball_dir == 2):
new_x = self.ball_x+1
new_y = self.ball_y+1
elif(self.ball_dir == 3):
new_x = self.ball_x-1
new_y = self.ball_y+1
strike_toggle = False
if(new_x<0 or new_x>9):
if(new_x<0):
new_x = 0
if(new_x>9):
new_x=9
self.ball_dir=[1,0,3,2][self.ball_dir]
if(new_y<0):
new_y = 0
self.ball_dir=[3,2,1,0][self.ball_dir]
elif(self.brick_map[new_y,new_x]==1):
strike_toggle = True
if(not self.strike):
r+=1
self.strike = True
self.brick_map[new_y,new_x]=0
new_y = self.last_y
self.ball_dir=[3,2,1,0][self.ball_dir]
elif(new_y == 9):
if(np.count_nonzero(self.brick_map)==0):
self.brick_map[1:4,:] = 1
if(self.ball_x == self.pos):
self.ball_dir=[3,2,1,0][self.ball_dir]
new_y = self.last_y
elif(new_x == self.pos):
self.ball_dir=[2,3,0,1][self.ball_dir]
new_y = self.last_y
else:
self.terminal = True
if(not strike_toggle):
self.strike = False
self.ball_x = new_x
self.ball_y = new_y
return r, self.terminal
# Query the current level of the difficulty ramp, difficulty does not ramp in this game, so return None
def difficulty_ramp(self):
return None
# Process the game-state into the 10x10xn state provided to the agent and return
def state(self):
state = np.zeros((10,10,len(self.channels)),dtype=bool)
state[self.ball_y,self.ball_x,self.channels['ball']] = 1
state[9,self.pos, self.channels['paddle']] = 1
state[self.last_y,self.last_x,self.channels['trail']] = 1
state[:,:,self.channels['brick']] = self.brick_map
return state
# Reset to start state for new episode
def reset(self):
self.ball_y = 3
ball_start = self.random.randint(2)
self.ball_x, self.ball_dir = [(0,2),(9,3)][ball_start]
self.pos = 4
self.brick_map = np.zeros((10,10))
self.brick_map[1:4,:] = 1
self.strike = False
self.last_x = self.ball_x
self.last_y = self.ball_y
self.terminal = False
# Dimensionality of the game-state (10x10xn)
def state_shape(self):
return [10,10,len(self.channels)]
# Subset of actions that actually have a unique impact in this environment
def minimal_action_set(self):
minimal_actions = ['n','l','r']
return [self.action_map.index(x) for x in minimal_actions]