-
Notifications
You must be signed in to change notification settings - Fork 4.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3215 from saihv/PR/airgym
Gym environments and stable-baselines integration for RL
- Loading branch information
Showing
11 changed files
with
636 additions
and
1,128 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from gym.envs.registration import register | ||
|
||
register( | ||
id="airsim-drone-sample-v0", entry_point="airgym.envs:AirSimDroneEnv", | ||
) | ||
|
||
register( | ||
id="airsim-car-sample-v0", entry_point="airgym.envs:AirSimCarEnv", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from airgym.envs.airsim_env import AirSimEnv | ||
from airgym.envs.car_env import AirSimCarEnv | ||
from airgym.envs.drone_env import AirSimDroneEnv | ||
|
31 changes: 31 additions & 0 deletions
31
PythonClient/reinforcement_learning/airgym/envs/airsim_env.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import numpy as np | ||
import airsim | ||
|
||
import gym | ||
from gym import spaces | ||
|
||
|
||
class AirSimEnv(gym.Env): | ||
metadata = {"render.modes": ["rgb_array"]} | ||
|
||
def __init__(self, image_shape): | ||
self.observation_space = spaces.Box(0, 255, shape=image_shape, dtype=np.uint8) | ||
self.viewer = None | ||
|
||
def __del__(self): | ||
raise NotImplementedError() | ||
|
||
def _get_obs(self): | ||
raise NotImplementedError() | ||
|
||
def _compute_reward(self): | ||
raise NotImplementedError() | ||
|
||
def close(self): | ||
raise NotImplementedError() | ||
|
||
def step(self, action): | ||
raise NotImplementedError() | ||
|
||
def render(self): | ||
return self._get_obs() |
153 changes: 153 additions & 0 deletions
153
PythonClient/reinforcement_learning/airgym/envs/car_env.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
import setup_path | ||
import airsim | ||
import numpy as np | ||
import math | ||
import time | ||
|
||
import gym | ||
from gym import spaces | ||
from airgym.envs.airsim_env import AirSimEnv | ||
|
||
|
||
class AirSimCarEnv(AirSimEnv): | ||
def __init__(self, ip_address, image_shape): | ||
super().__init__(image_shape) | ||
|
||
self.image_shape = image_shape | ||
self.start_ts = 0 | ||
|
||
self.state = { | ||
"position": np.zeros(3), | ||
"collision": False, | ||
"prev_position": np.zeros(3), | ||
} | ||
|
||
self.car = airsim.CarClient(ip=ip_address) | ||
self.action_space = spaces.Discrete(6) | ||
|
||
self.image_request = airsim.ImageRequest( | ||
"0", airsim.ImageType.DepthPerspective, True, False | ||
) | ||
|
||
self.car_controls = airsim.CarControls() | ||
self.car_state = None | ||
|
||
self.state["pose"] = None | ||
self.state["prev_pose"] = None | ||
self.state["collision"] = None | ||
|
||
def _setup_car(self): | ||
self.car.reset() | ||
self.car.enableApiControl(True) | ||
self.car.armDisarm(True) | ||
time.sleep(0.01) | ||
|
||
def __del__(self): | ||
self.car.reset() | ||
|
||
def _do_action(self, action): | ||
self.car_controls.brake = 0 | ||
self.car_controls.throttle = 1 | ||
if action == 0: | ||
self.car_controls.throttle = 0 | ||
self.car_controls.brake = 1 | ||
elif action == 1: | ||
self.car_controls.steering = 0 | ||
elif action == 2: | ||
self.car_controls.steering = 0.5 | ||
elif action == 3: | ||
self.car_controls.steering = -0.5 | ||
elif action == 4: | ||
self.car_controls.steering = 0.25 | ||
else: | ||
self.car_controls.steering = -0.25 | ||
|
||
self.car.setCarControls(self.car_controls) | ||
time.sleep(1) | ||
|
||
def transform_obs(self, responses): | ||
img1d = np.array(responses[0].image_data_float, dtype=np.float) | ||
img1d = 255 / np.maximum(np.ones(img1d.size), img1d) | ||
img2d = np.reshape(img1d, (responses[0].height, responses[0].width)) | ||
|
||
from PIL import Image | ||
|
||
image = Image.fromarray(img2d) | ||
im_final = np.array(image.resize((84, 84)).convert("L")) | ||
|
||
return im_final.reshape([84, 84, 1]) | ||
|
||
def _get_obs(self): | ||
responses = self.car.simGetImages([self.image_request]) | ||
image = self.transform_obs(responses) | ||
|
||
self.car_state = self.car.getCarState() | ||
collision = self.car.simGetCollisionInfo().has_collided | ||
|
||
self.state["prev_pose"] = self.state["pose"] | ||
self.state["pose"] = self.car_state.kinematics_estimated | ||
self.state["collision"] = collision | ||
|
||
return image | ||
|
||
def _compute_reward(self): | ||
MAX_SPEED = 300 | ||
MIN_SPEED = 10 | ||
thresh_dist = 3.5 | ||
beta = 3 | ||
|
||
z = 0 | ||
pts = [ | ||
np.array([0, -1, z]), | ||
np.array([130, -1, z]), | ||
np.array([130, 125, z]), | ||
np.array([0, 125, z]), | ||
np.array([0, -1, z]), | ||
np.array([130, -1, z]), | ||
np.array([130, -128, z]), | ||
np.array([0, -128, z]), | ||
np.array([0, -1, z]), | ||
] | ||
pd = self.state["pose"].position | ||
car_pt = np.array([pd.x_val, pd.y_val, pd.z_val]) | ||
|
||
dist = 10000000 | ||
for i in range(0, len(pts) - 1): | ||
dist = min( | ||
dist, | ||
np.linalg.norm(np.cross((car_pt - pts[i]), (car_pt - pts[i + 1]))) | ||
/ np.linalg.norm(pts[i] - pts[i + 1]), | ||
) | ||
|
||
# print(dist) | ||
if dist > thresh_dist: | ||
reward = -3 | ||
else: | ||
reward_dist = math.exp(-beta * dist) - 0.5 | ||
reward_speed = ( | ||
(self.car_state.speed - MIN_SPEED) / (MAX_SPEED - MIN_SPEED) | ||
) - 0.5 | ||
reward = reward_dist + reward_speed | ||
|
||
done = 0 | ||
if reward < -1: | ||
done = 1 | ||
if self.car_controls.brake == 0: | ||
if self.car_state.speed <= 1: | ||
done = 1 | ||
if self.state["collision"]: | ||
done = 1 | ||
|
||
return reward, done | ||
|
||
def step(self, action): | ||
self._do_action(action) | ||
obs = self._get_obs() | ||
reward, done = self._compute_reward() | ||
|
||
return obs, reward, done, self.state | ||
|
||
def reset(self): | ||
self._setup_car() | ||
self._do_action(1) | ||
return self._get_obs() |
164 changes: 164 additions & 0 deletions
164
PythonClient/reinforcement_learning/airgym/envs/drone_env.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
import setup_path | ||
import airsim | ||
import numpy as np | ||
import math | ||
import time | ||
from argparse import ArgumentParser | ||
|
||
import gym | ||
from gym import spaces | ||
from airgym.envs.airsim_env import AirSimEnv | ||
|
||
|
||
class AirSimDroneEnv(AirSimEnv): | ||
def __init__(self, ip_address, step_length, image_shape): | ||
super().__init__(image_shape) | ||
self.step_length = step_length | ||
self.image_shape = image_shape | ||
|
||
self.state = { | ||
"position": np.zeros(3), | ||
"collision": False, | ||
"prev_position": np.zeros(3), | ||
} | ||
|
||
self.drone = airsim.MultirotorClient(ip=ip_address) | ||
self.action_space = spaces.Discrete(7) | ||
self._setup_flight() | ||
|
||
self.image_request = airsim.ImageRequest( | ||
3, airsim.ImageType.DepthPerspective, True, False | ||
) | ||
|
||
def __del__(self): | ||
self.drone.reset() | ||
|
||
def _setup_flight(self): | ||
self.drone.reset() | ||
self.drone.enableApiControl(True) | ||
self.drone.armDisarm(True) | ||
|
||
# Set home position and velocity | ||
self.drone.moveToPositionAsync(-0.55265, -31.9786, -19.0225, 10).join() | ||
self.drone.moveByVelocityAsync(1, -0.67, -0.8, 5).join() | ||
|
||
def transform_obs(self, responses): | ||
img1d = np.array(responses[0].image_data_float, dtype=np.float) | ||
img1d = 255 / np.maximum(np.ones(img1d.size), img1d) | ||
img2d = np.reshape(img1d, (responses[0].height, responses[0].width)) | ||
|
||
from PIL import Image | ||
|
||
image = Image.fromarray(img2d) | ||
im_final = np.array(image.resize((84, 84)).convert("L")) | ||
|
||
return im_final.reshape([84, 84, 1]) | ||
|
||
def _get_obs(self): | ||
responses = self.drone.simGetImages([self.image_request]) | ||
image = self.transform_obs(responses) | ||
self.drone_state = self.drone.getMultirotorState() | ||
|
||
self.state["prev_position"] = self.state["position"] | ||
self.state["position"] = self.drone_state.kinematics_estimated.position | ||
self.state["velocity"] = self.drone_state.kinematics_estimated.linear_velocity | ||
|
||
collision = self.drone.simGetCollisionInfo().has_collided | ||
self.state["collision"] = collision | ||
|
||
return image | ||
|
||
def _do_action(self, action): | ||
quad_offset = self.interpret_action(action) | ||
quad_vel = self.drone.getMultirotorState().kinematics_estimated.linear_velocity | ||
self.drone.moveByVelocityAsync( | ||
quad_vel.x_val + quad_offset[0], | ||
quad_vel.y_val + quad_offset[1], | ||
quad_vel.z_val + quad_offset[2], | ||
5, | ||
).join() | ||
|
||
def _compute_reward(self): | ||
thresh_dist = 7 | ||
beta = 1 | ||
|
||
z = -10 | ||
pts = [ | ||
np.array([-0.55265, -31.9786, -19.0225]), | ||
np.array([48.59735, -63.3286, -60.07256]), | ||
np.array([193.5974, -55.0786, -46.32256]), | ||
np.array([369.2474, 35.32137, -62.5725]), | ||
np.array([541.3474, 143.6714, -32.07256]), | ||
] | ||
|
||
quad_pt = np.array( | ||
list( | ||
( | ||
self.state["position"].x_val, | ||
self.state["position"].y_val, | ||
self.state["position"].z_val, | ||
) | ||
) | ||
) | ||
|
||
if self.state["collision"]: | ||
reward = -100 | ||
else: | ||
dist = 10000000 | ||
for i in range(0, len(pts) - 1): | ||
dist = min( | ||
dist, | ||
np.linalg.norm(np.cross((quad_pt - pts[i]), (quad_pt - pts[i + 1]))) | ||
/ np.linalg.norm(pts[i] - pts[i + 1]), | ||
) | ||
|
||
if dist > thresh_dist: | ||
reward = -10 | ||
else: | ||
reward_dist = math.exp(-beta * dist) - 0.5 | ||
reward_speed = ( | ||
np.linalg.norm( | ||
[ | ||
self.state["velocity"].x_val, | ||
self.state["velocity"].y_val, | ||
self.state["velocity"].z_val, | ||
] | ||
) | ||
- 0.5 | ||
) | ||
reward = reward_dist + reward_speed | ||
|
||
done = 0 | ||
if reward <= -10: | ||
done = 1 | ||
|
||
return reward, done | ||
|
||
def step(self, action): | ||
self._do_action(action) | ||
obs = self._get_obs() | ||
reward, done = self._compute_reward() | ||
|
||
return obs, reward, done, self.state | ||
|
||
def reset(self): | ||
self._setup_flight() | ||
return self._get_obs() | ||
|
||
def interpret_action(self, action): | ||
if action == 0: | ||
quad_offset = (self.step_length, 0, 0) | ||
elif action == 1: | ||
quad_offset = (0, self.step_length, 0) | ||
elif action == 2: | ||
quad_offset = (0, 0, self.step_length) | ||
elif action == 3: | ||
quad_offset = (-self.step_length, 0, 0) | ||
elif action == 4: | ||
quad_offset = (0, -self.step_length, 0) | ||
elif action == 5: | ||
quad_offset = (0, 0, -self.step_length) | ||
else: | ||
quad_offset = (0, 0, 0) | ||
|
||
return quad_offset |
Oops, something went wrong.