-
Notifications
You must be signed in to change notification settings - Fork 0
/
explore.py
executable file
·56 lines (44 loc) · 1.49 KB
/
explore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env python3
import numpy as np
from gameboy import GBGym
def main():
# Each element in the dataset has a few properties:
# state,
# action,
# next_state,
# reward,
gym = GBGym(step_backwards=True)
GAME_COUNT = 5
GAME_LENGTH = 300
rewards = np.zeros(44)
gym.reset()
dataset = np.zeros(shape=(GAME_COUNT * GAME_LENGTH, 64))
for game in range(0, GAME_COUNT):
rewards = np.zeros(44)
state, _ = gym.reset()
state = state[0]
for turn in range(0, GAME_LENGTH):
previous_state = state
for action in range(0, 44):
state, reward, terminated, _ = gym.step(action)
state = state[0]
if terminated:
rewards[action] = -69
else:
rewards[action] = reward
gym.step_back()
if np.all(rewards == -69):
print('ending game...')
break
state_vector = np.concatenate([previous_state], axis=0)
datapoint = np.concatenate((state_vector, rewards), axis=0)
best_action = np.argmax(rewards)
index = game * GAME_LENGTH + turn
print('saving at array index = ', index)
dataset[index] = datapoint
state, reward, _, _ = gym.step(best_action)
state = state[0]
with open('dataset.npy', 'wb') as file:
np.save(file, dataset)
if __name__ == '__main__':
main()