Skip to content

Commit

Permalink
Merge pull request #3215 from saihv/PR/airgym
Browse files Browse the repository at this point in the history
Gym environments and stable-baselines integration for RL
  • Loading branch information
zimmy87 authored Dec 17, 2020
2 parents 8a4029a + 700a00b commit e671c4a
Show file tree
Hide file tree
Showing 11 changed files with 636 additions and 1,128 deletions.
536 changes: 0 additions & 536 deletions PythonClient/car/DQNcar.py

This file was deleted.

545 changes: 0 additions & 545 deletions PythonClient/multirotor/DQNdrone.py

This file was deleted.

9 changes: 9 additions & 0 deletions PythonClient/reinforcement_learning/airgym/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from gym.envs.registration import register

register(
id="airsim-drone-sample-v0", entry_point="airgym.envs:AirSimDroneEnv",
)

register(
id="airsim-car-sample-v0", entry_point="airgym.envs:AirSimCarEnv",
)
4 changes: 4 additions & 0 deletions PythonClient/reinforcement_learning/airgym/envs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from airgym.envs.airsim_env import AirSimEnv
from airgym.envs.car_env import AirSimCarEnv
from airgym.envs.drone_env import AirSimDroneEnv

31 changes: 31 additions & 0 deletions PythonClient/reinforcement_learning/airgym/envs/airsim_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import numpy as np
import airsim

import gym
from gym import spaces


class AirSimEnv(gym.Env):
metadata = {"render.modes": ["rgb_array"]}

def __init__(self, image_shape):
self.observation_space = spaces.Box(0, 255, shape=image_shape, dtype=np.uint8)
self.viewer = None

def __del__(self):
raise NotImplementedError()

def _get_obs(self):
raise NotImplementedError()

def _compute_reward(self):
raise NotImplementedError()

def close(self):
raise NotImplementedError()

def step(self, action):
raise NotImplementedError()

def render(self):
return self._get_obs()
153 changes: 153 additions & 0 deletions PythonClient/reinforcement_learning/airgym/envs/car_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import setup_path
import airsim
import numpy as np
import math
import time

import gym
from gym import spaces
from airgym.envs.airsim_env import AirSimEnv


class AirSimCarEnv(AirSimEnv):
def __init__(self, ip_address, image_shape):
super().__init__(image_shape)

self.image_shape = image_shape
self.start_ts = 0

self.state = {
"position": np.zeros(3),
"collision": False,
"prev_position": np.zeros(3),
}

self.car = airsim.CarClient(ip=ip_address)
self.action_space = spaces.Discrete(6)

self.image_request = airsim.ImageRequest(
"0", airsim.ImageType.DepthPerspective, True, False
)

self.car_controls = airsim.CarControls()
self.car_state = None

self.state["pose"] = None
self.state["prev_pose"] = None
self.state["collision"] = None

def _setup_car(self):
self.car.reset()
self.car.enableApiControl(True)
self.car.armDisarm(True)
time.sleep(0.01)

def __del__(self):
self.car.reset()

def _do_action(self, action):
self.car_controls.brake = 0
self.car_controls.throttle = 1
if action == 0:
self.car_controls.throttle = 0
self.car_controls.brake = 1
elif action == 1:
self.car_controls.steering = 0
elif action == 2:
self.car_controls.steering = 0.5
elif action == 3:
self.car_controls.steering = -0.5
elif action == 4:
self.car_controls.steering = 0.25
else:
self.car_controls.steering = -0.25

self.car.setCarControls(self.car_controls)
time.sleep(1)

def transform_obs(self, responses):
img1d = np.array(responses[0].image_data_float, dtype=np.float)
img1d = 255 / np.maximum(np.ones(img1d.size), img1d)
img2d = np.reshape(img1d, (responses[0].height, responses[0].width))

from PIL import Image

image = Image.fromarray(img2d)
im_final = np.array(image.resize((84, 84)).convert("L"))

return im_final.reshape([84, 84, 1])

def _get_obs(self):
responses = self.car.simGetImages([self.image_request])
image = self.transform_obs(responses)

self.car_state = self.car.getCarState()
collision = self.car.simGetCollisionInfo().has_collided

self.state["prev_pose"] = self.state["pose"]
self.state["pose"] = self.car_state.kinematics_estimated
self.state["collision"] = collision

return image

def _compute_reward(self):
MAX_SPEED = 300
MIN_SPEED = 10
thresh_dist = 3.5
beta = 3

z = 0
pts = [
np.array([0, -1, z]),
np.array([130, -1, z]),
np.array([130, 125, z]),
np.array([0, 125, z]),
np.array([0, -1, z]),
np.array([130, -1, z]),
np.array([130, -128, z]),
np.array([0, -128, z]),
np.array([0, -1, z]),
]
pd = self.state["pose"].position
car_pt = np.array([pd.x_val, pd.y_val, pd.z_val])

dist = 10000000
for i in range(0, len(pts) - 1):
dist = min(
dist,
np.linalg.norm(np.cross((car_pt - pts[i]), (car_pt - pts[i + 1])))
/ np.linalg.norm(pts[i] - pts[i + 1]),
)

# print(dist)
if dist > thresh_dist:
reward = -3
else:
reward_dist = math.exp(-beta * dist) - 0.5
reward_speed = (
(self.car_state.speed - MIN_SPEED) / (MAX_SPEED - MIN_SPEED)
) - 0.5
reward = reward_dist + reward_speed

done = 0
if reward < -1:
done = 1
if self.car_controls.brake == 0:
if self.car_state.speed <= 1:
done = 1
if self.state["collision"]:
done = 1

return reward, done

def step(self, action):
self._do_action(action)
obs = self._get_obs()
reward, done = self._compute_reward()

return obs, reward, done, self.state

def reset(self):
self._setup_car()
self._do_action(1)
return self._get_obs()
164 changes: 164 additions & 0 deletions PythonClient/reinforcement_learning/airgym/envs/drone_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import setup_path
import airsim
import numpy as np
import math
import time
from argparse import ArgumentParser

import gym
from gym import spaces
from airgym.envs.airsim_env import AirSimEnv


class AirSimDroneEnv(AirSimEnv):
def __init__(self, ip_address, step_length, image_shape):
super().__init__(image_shape)
self.step_length = step_length
self.image_shape = image_shape

self.state = {
"position": np.zeros(3),
"collision": False,
"prev_position": np.zeros(3),
}

self.drone = airsim.MultirotorClient(ip=ip_address)
self.action_space = spaces.Discrete(7)
self._setup_flight()

self.image_request = airsim.ImageRequest(
3, airsim.ImageType.DepthPerspective, True, False
)

def __del__(self):
self.drone.reset()

def _setup_flight(self):
self.drone.reset()
self.drone.enableApiControl(True)
self.drone.armDisarm(True)

# Set home position and velocity
self.drone.moveToPositionAsync(-0.55265, -31.9786, -19.0225, 10).join()
self.drone.moveByVelocityAsync(1, -0.67, -0.8, 5).join()

def transform_obs(self, responses):
img1d = np.array(responses[0].image_data_float, dtype=np.float)
img1d = 255 / np.maximum(np.ones(img1d.size), img1d)
img2d = np.reshape(img1d, (responses[0].height, responses[0].width))

from PIL import Image

image = Image.fromarray(img2d)
im_final = np.array(image.resize((84, 84)).convert("L"))

return im_final.reshape([84, 84, 1])

def _get_obs(self):
responses = self.drone.simGetImages([self.image_request])
image = self.transform_obs(responses)
self.drone_state = self.drone.getMultirotorState()

self.state["prev_position"] = self.state["position"]
self.state["position"] = self.drone_state.kinematics_estimated.position
self.state["velocity"] = self.drone_state.kinematics_estimated.linear_velocity

collision = self.drone.simGetCollisionInfo().has_collided
self.state["collision"] = collision

return image

def _do_action(self, action):
quad_offset = self.interpret_action(action)
quad_vel = self.drone.getMultirotorState().kinematics_estimated.linear_velocity
self.drone.moveByVelocityAsync(
quad_vel.x_val + quad_offset[0],
quad_vel.y_val + quad_offset[1],
quad_vel.z_val + quad_offset[2],
5,
).join()

def _compute_reward(self):
thresh_dist = 7
beta = 1

z = -10
pts = [
np.array([-0.55265, -31.9786, -19.0225]),
np.array([48.59735, -63.3286, -60.07256]),
np.array([193.5974, -55.0786, -46.32256]),
np.array([369.2474, 35.32137, -62.5725]),
np.array([541.3474, 143.6714, -32.07256]),
]

quad_pt = np.array(
list(
(
self.state["position"].x_val,
self.state["position"].y_val,
self.state["position"].z_val,
)
)
)

if self.state["collision"]:
reward = -100
else:
dist = 10000000
for i in range(0, len(pts) - 1):
dist = min(
dist,
np.linalg.norm(np.cross((quad_pt - pts[i]), (quad_pt - pts[i + 1])))
/ np.linalg.norm(pts[i] - pts[i + 1]),
)

if dist > thresh_dist:
reward = -10
else:
reward_dist = math.exp(-beta * dist) - 0.5
reward_speed = (
np.linalg.norm(
[
self.state["velocity"].x_val,
self.state["velocity"].y_val,
self.state["velocity"].z_val,
]
)
- 0.5
)
reward = reward_dist + reward_speed

done = 0
if reward <= -10:
done = 1

return reward, done

def step(self, action):
self._do_action(action)
obs = self._get_obs()
reward, done = self._compute_reward()

return obs, reward, done, self.state

def reset(self):
self._setup_flight()
return self._get_obs()

def interpret_action(self, action):
if action == 0:
quad_offset = (self.step_length, 0, 0)
elif action == 1:
quad_offset = (0, self.step_length, 0)
elif action == 2:
quad_offset = (0, 0, self.step_length)
elif action == 3:
quad_offset = (-self.step_length, 0, 0)
elif action == 4:
quad_offset = (0, -self.step_length, 0)
elif action == 5:
quad_offset = (0, 0, -self.step_length)
else:
quad_offset = (0, 0, 0)

return quad_offset
Loading

0 comments on commit e671c4a

Please sign in to comment.