forked from ikostrikov/pytorch-a3c
-
Notifications
You must be signed in to change notification settings - Fork 0
/
envs.py
56 lines (44 loc) · 1.76 KB
/
envs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import cv2
import gym
import numpy as np
from gym.spaces.box import Box
# Taken from https://github.com/openai/universe-starter-agent
def create_atari_env(env_id):
env = gym.make(env_id)
env = AtariRescale42x42(env)
env = NormalizedEnv(env)
return env
def _process_frame42(frame):
frame = frame[34:34 + 160, :160]
# Resize by half, then down to 42x42 (essentially mipmapping). If
# we resize directly we lose pixels that, when mapped to 42x42,
# aren't close enough to the pixel boundary.
frame = cv2.resize(frame, (80, 80))
frame = cv2.resize(frame, (42, 42))
frame = frame.mean(2, keepdims=True)
frame = frame.astype(np.float32)
frame *= (1.0 / 255.0)
frame = np.moveaxis(frame, -1, 0)
return frame
class AtariRescale42x42(gym.ObservationWrapper):
def __init__(self, env=None):
super(AtariRescale42x42, self).__init__(env)
self.observation_space = Box(0.0, 1.0, [1, 42, 42])
def observation(self, observation):
return _process_frame42(observation)
class NormalizedEnv(gym.ObservationWrapper):
def __init__(self, env=None):
super(NormalizedEnv, self).__init__(env)
self.state_mean = 0
self.state_std = 0
self.alpha = 0.9999
self.num_steps = 0
def observation(self, observation):
self.num_steps += 1
self.state_mean = self.state_mean * self.alpha + \
observation.mean() * (1 - self.alpha)
self.state_std = self.state_std * self.alpha + \
observation.std() * (1 - self.alpha)
unbiased_mean = self.state_mean / (1 - pow(self.alpha, self.num_steps))
unbiased_std = self.state_std / (1 - pow(self.alpha, self.num_steps))
return (observation - unbiased_mean) / (unbiased_std + 1e-8)