forked from jsztompka/MultiAgent-PPO
-
Notifications
You must be signed in to change notification settings - Fork 0
/
UnityEnv.py
68 lines (44 loc) · 1.96 KB
/
UnityEnv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import numpy as np
from unityagents import UnityEnvironment
"""UnityEnv is a wrapper around UnityEnvironment
The main purpose for this Env is to establish a common interface which most environments expose
"""
class UnityEnv:
def __init__(self,
env_path,
train_mode = True
):
self.brain = None
self.brain_name = None
self.train_mode = train_mode
self.env = self.create_unity_env(env_path)
#env details
self.action_space = self.brain.vector_action_space_size
self.observation_space = self.brain.vector_observation_space_size
print(f'Action space {self.action_space}')
print(f'State space {self.observation_space}')
#backwards compatibility
self.action_dim = self.action_space
#self.observation_space = self.env.observation_space
self.state_dim = int(np.prod(self.observation_space))
def extract_env_details(self, env_info):
next_state = env_info.vector_observations # get the next state
reward = env_info.rewards # get the reward
done = env_info.local_done # see if episode has finished
return next_state, reward, done
def create_unity_env(self, env_path):
env = UnityEnvironment(file_name=env_path)
self.brain_name = env.brain_names[0]
self.brain = env.brains[self.brain_name]
return env
def reset(self):
env_info = self.env.reset(train_mode=self.train_mode)[self.brain_name]
return self.extract_env_details(env_info)[0]
def step(self, actions):
actions = np.clip(actions, -1, 1)
# torch.clamp(actions, min=-1, max=1)
self.env.step(actions)[self.brain_name]
env_info = self.env.step(actions)[self.brain_name]
next_states, rewards, dones = self.extract_env_details(env_info)
return next_states, rewards, np.array(dones)
# return next_state, reward, np.array([done])