This repository has been archived by the owner on Sep 4, 2024. It is now read-only.
forked from kenjyoung/MinAtar
-
Notifications
You must be signed in to change notification settings - Fork 1
/
environment.py
207 lines (168 loc) · 5.45 KB
/
environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#!/usr/bin/env python3
# Import modules
import gym
from copy import deepcopy
from env.PendulumEnv import PendulumEnv
from env.Acrobot import AcrobotEnv
from env.Gridworld import GridworldEnv
import env.MinAtar as MinAtar
import numpy as np
class Environment:
"""
Environment is a wrapper around concrete implementations of environments
which logs data.
"""
def __init__(self, config, seed, monitor=False, monitor_after=0):
"""
Constructor
Parameters
----------
config : dict
The environment configuration file
seed : int
The seed to use for all random number generators
monitor : bool
Whether or not to render the scenes as the agent learns, by
default False
monitor_after : int
If monitor is True, how many timesteps should pass before
the scene is rendered, by default 0.
"""
self.steps = 0
self.episodes = 0
# Whether to render the environment, and when to. Useful for debugging.
self.monitor = monitor
self.steps_until_monitor = monitor_after
# Set up the wrapped environment
self.env_name = config["env_name"]
self.env = _env_factory(config)
self.env.seed(seed=seed)
self.steps_per_episode = config["steps_per_episode"]
# Log environment info
if "info" in dir(self.env):
self.info = self.env.info
else:
self.info = {}
@property
def action_space(self):
"""
Gets the action space of the Gym environment
Returns
-------
gym.spaces.Space
The action space
"""
return self.env.action_space
@property
def observation_space(self):
"""
Gets the observation space of the Gym environment
Returns
-------
gym.spaces.Space
The observation space
"""
return self.env.observation_space
def seed(self, seed):
"""
Seeds the environment with a random seed
Parameters
----------
seed : int
The random seed to seed the environment with
"""
self.env.seed(seed)
def reset(self):
"""
Resets the environment by resetting the step counter to 0 and resetting
the wrapped environment. This function also increments the total
episode count.
Returns
-------
2-tuple of array_like, dict
The new starting state and an info dictionary
"""
self.steps = 0
self.episodes += 1
state = self.env.reset()
return state, {"orig_state": state}
def render(self):
"""
Renders the current frame
"""
self.env.render()
def step(self, action):
"""
Takes a single environmental step
Parameters
----------
action : array_like of float
The action array. The number of elements in this array should be
the same as the action dimension.
Returns
-------
float, array_like of float, bool, dict
The reward and next state as well as a flag specifying if the
current episode has been completed and an info dictionary
"""
if self.monitor and self.steps_until_monitor < 0:
self.render()
elif self.monitor:
self.steps_until_monitor -= (
1 if self.steps_until_monitor >= 0 else 0
)
self.steps += 1
# Get the next state, reward, and done flag
state, reward, done, info = self.env.step(action)
info["orig_state"] = state
# If the episode completes, return the goal reward
if done:
info["steps_exceeded"] = False
return state, reward, done, info
# If the maximum time-step was reached
if self.steps >= self.steps_per_episode > 0:
done = True
info["steps_exceeded"] = True
return state, reward, done, info
def _env_factory(config):
"""
Instantiates and returns an environment given an environment configuration
file.
Parameters
----------
config : dict
The environment config
Returns
-------
gym.Env
The environment to train on
"""
name = config["env_name"]
seed = config["seed"]
env = None
if name == "Pendulum-v0":
env = PendulumEnv(seed=seed, continuous_action=config["continuous"])
elif name == "Gridworld":
env = GridworldEnv(config["rows"], config["cols"])
env.seed(seed)
elif name == "Acrobot-v1":
env = AcrobotEnv(seed=seed, continuous_action=config["continuous"])
# If using MinAtar environments, we need a wrapper to permute the batch
# dimensions to be consistent with PyTorch.
elif "minatar" in name.lower():
if "/" in name:
raise ValueError(f"specify environment as MinAtar{name} rather " +
"than MinAtar/{name}")
minimal_actions = config.get("use_minimal_action_set", True)
stripped_name = name[7:].lower() # Strip off "MinAtar"
env = MinAtar.BatchFirst(
MinAtar.GymEnv(
stripped_name,
use_minimal_action_set=minimal_actions,
)
)
# Otherwise use a gym environment
else:
env = gym.make(name).env
env.seed(seed)
return env