From d1271707639e49b3bf1f56a6fa49b06d10ddd1cf Mon Sep 17 00:00:00 2001 From: L-ED Date: Sun, 24 Mar 2024 10:14:19 +0300 Subject: [PATCH] added flight --- .../envs/single_agent_rl/__init__.py | 1 + .../single_agent_rl/flight/FlightFullState.py | 29 ++-- .../envs/single_agent_rl/flight/FlightGPS.py | 153 ------------------ .../envs/single_agent_rl/flight/FlightIMU.py | 144 ----------------- .../envs/single_agent_rl/flight/__init__.py | 4 +- .../examples/flight/flight_learn.py | 75 +++++++++ .../examples/flight/flight_test.py | 55 +++++++ .../{hover_rl.py => hover/hover_learn.py} | 2 +- .../examples/hover/hover_test.py | 55 +++++++ 9 files changed, 201 insertions(+), 317 deletions(-) delete mode 100644 gym_pybullet_drones/envs/single_agent_rl/flight/FlightGPS.py delete mode 100644 gym_pybullet_drones/envs/single_agent_rl/flight/FlightIMU.py create mode 100644 gym_pybullet_drones/examples/flight/flight_learn.py create mode 100644 gym_pybullet_drones/examples/flight/flight_test.py rename gym_pybullet_drones/examples/{hover_rl.py => hover/hover_learn.py} (94%) create mode 100644 gym_pybullet_drones/examples/hover/hover_test.py diff --git a/gym_pybullet_drones/envs/single_agent_rl/__init__.py b/gym_pybullet_drones/envs/single_agent_rl/__init__.py index 0182891..4593dfb 100644 --- a/gym_pybullet_drones/envs/single_agent_rl/__init__.py +++ b/gym_pybullet_drones/envs/single_agent_rl/__init__.py @@ -5,3 +5,4 @@ from gym_pybullet_drones.envs.single_agent_rl.BaseRL import BaseRL from gym_pybullet_drones.envs.single_agent_rl.hover import HoverIMU, HoverGPS, HoverFullState +from gym_pybullet_drones.envs.single_agent_rl.flight import FlightFullState#, FlightGPS, FlightIMU diff --git a/gym_pybullet_drones/envs/single_agent_rl/flight/FlightFullState.py b/gym_pybullet_drones/envs/single_agent_rl/flight/FlightFullState.py index 7bb9e7d..9c0bc81 100644 --- a/gym_pybullet_drones/envs/single_agent_rl/flight/FlightFullState.py +++ b/gym_pybullet_drones/envs/single_agent_rl/flight/FlightFullState.py @@ -27,7 +27,7 @@ def __init__( self.max_ang_vel = 2 #10 self.max_radius = 1 - self.target_pos = np.array([0, 0, 1]) + self.target_pos = np.array([10, 0, 1]) self.randomize = True if client is None: @@ -39,8 +39,7 @@ def __init__( if drone is None: sensors= [ - FullState(1e3), - mpu6000() + FullState(500) ] state = State() @@ -60,8 +59,8 @@ def __init__( def normalize_observation_space(self): self.observation_space = spaces.Box( - low=-1*np.ones((1, 18)), - high=np.ones((1, 18)), + low=-1*np.ones((1, 15)), + high=np.ones((1, 15)), dtype=np.float32 ) @@ -73,17 +72,13 @@ def preprocess_action(self, action): def preprocess_observation(self, observation): pos, ang, vel, a_vel, acc, a_acc = observation['FS_0'] - imu = observation['IMU_0'] targ_disp = self.target_pos - pos stats = [ - pos, - a_vel, + pos, + ang, vel, - imu[:3], - imu[3:], - # a_acc, - # acc, + a_vel, targ_disp ] @@ -94,7 +89,7 @@ def preprocess_observation(self, observation): # value = value/value_norm # stats[i] = value - return np.concatenate(stats).reshape((1, 18)) + return np.concatenate(stats).reshape((1, 15)) def check_termination(self): @@ -102,7 +97,7 @@ def check_termination(self): term = False pos = np.copy(self.drone.state.world.pos) pos[2] -= 1 - is_out = sum(pos**2) > self.max_radius**2 + is_out = sum(pos**2) > sum(self.target_pos**2)*1.5 if is_out: term = True return term @@ -132,7 +127,7 @@ def reward(self): state = deepcopy(self.drone.state) - disp = np.array([0, 0, 1]) - state.world.pos + disp = self.target_pos - state.world.pos displ_dir = disp/np.linalg.norm(disp) displ_normalized = np.sum(disp**2)/(safe_radius)**2 @@ -150,9 +145,9 @@ def reward(self): # if dir_reward < 0: # dir_reward *= np.linalg.norm(vel) - angles_reward = 0#-np.linalg.norm(state.world.ang_vel) + # angles_reward = -np.linalg.norm(state.world.ang_vel) - reward = dir_reward + angles_reward + closenes_reward + reward = dir_reward #+ closenes_reward # reward = (1-displ_normalized)#dir_reward + angles_reward + closenes_reward return reward diff --git a/gym_pybullet_drones/envs/single_agent_rl/flight/FlightGPS.py b/gym_pybullet_drones/envs/single_agent_rl/flight/FlightGPS.py deleted file mode 100644 index 99c2d32..0000000 --- a/gym_pybullet_drones/envs/single_agent_rl/flight/FlightGPS.py +++ /dev/null @@ -1,153 +0,0 @@ -from gym_pybullet_drones.envs.single_agent_rl import BaseRL -from gymnasium import spaces -import numpy as np -import pybullet as pb - -from gym_pybullet_drones.vehicles import QuadCopter -from gym_pybullet_drones.devices import Camera, mpu6000, Barometer, GPS -from gym_pybullet_drones.utils.state import State -from torch import sigmoid -from copy import deepcopy - -class FlightGPS(BaseRL): - - def __init__( - self, - client=None, - drone=None, - control_system=None, - logger=None, - scene_objects=[], - visualize=False, - record=False, - realtime=False - ): - - self.max_g = 2*9.8 - self.max_ang_vel = 2 #10 - self.max_radius = 1 - - self.target_pos = np.array([0, 0, 1]) - - - if client is None: - if visualize: - client = pb.connect(pb.GUI) - else: - client = pb.connect(pb.DIRECT) - - if drone is None: - - sensors= [ - GPS(1e3), - mpu6000(), - # Barometer(1e3) - ] - - state = State() - state.world.pos[2] += 0.1 - - drone = QuadCopter( - client=client, - filename= 'custom.urdf',#'cf2x_cam.urdf', - # sensors = [], - sensors = sensors, - state=state - ) - - super().__init__(client, drone, control_system, logger, scene_objects, visualize, record, realtime) - - - def normalize_observation_space(self): - - self.observation_space = spaces.Box( - low=-1*np.ones((1, 9)), - high=np.ones((1, 9)), - dtype=np.float32 - ) - - - - def preprocess_action(self, action): - return self.drone.max_rpm/(1+np.exp(-action*4)) - - - def preprocess_observation(self, observation): - - imu = observation["IMU_0"] - # imu[:3] = np.clip(imu[:3], -self.max_g, self.max_g)/self.max_g - # imu[3:] = np.clip(imu[3:], -self.max_ang_vel, self.max_ang_vel)/self.max_ang_vel - - pos = observation["GPS_0"] - targ_disp = self.target_pos - pos - # targ_disp = targ_disp/np.linalg.norm(targ_disp) - # pos = pos/self.max_radius - # pos[2] = pos[2]/2 - - - return np.concatenate(( - imu, - # np.array([observation["Bar_0"]/(2*self.max_radius)]), - # pos - targ_disp#/np.linalg.norm(targ_disp) - )).reshape((1, 9)) - - - def check_termination(self): - - term = False - pos = np.copy(self.drone.state.world.pos) - pos[2] -= 1 - is_out = sum(pos**2) > self.max_radius**2 - if is_out: - term = True - return term - - - def create_initial_state(self): - state = super().create_initial_state() - new_pos = np.random.rand(3)*2 - 1 - # new_pos = np.zeros(3) - new_pos[2] = max(new_pos[2], 0.2) - state.world.pos = new_pos - return state - - - def create_initial_action(self): - return np.zeros(4) - - - def reward(self): - - # safe_radius= 0.3 - safe_radius= self.max_radius - - state = deepcopy(self.drone.state) - - disp = np.array([0, 0, 1]) - state.world.pos - displ_dir = disp/np.linalg.norm(disp) - - displ_normalized = np.sum(disp**2)/(safe_radius)**2 - - vel = state.world.vel - flight_dir = vel/np.linalg.norm(vel) - - vel_normalized = np.sum(vel**2)/(self.drone.max_speed/2)**2 - - closenes_reward = (1-displ_normalized)#*(1-vel_normalized) - if np.sum(disp**2)<0.3: - closenes_reward +=1 - - # print('Position: ', pos, " Velocity: ", vel) - # print('Displacement ', disp, "Disp_dir ", displ_dir) - # print("Velocity dir ", flight_dir) - - # print(flight_dir, displ_dir) - - dir_reward = np.dot(flight_dir, displ_dir) - angles_reward = -np.linalg.norm(state.world.ang_vel) - - reward = dir_reward + angles_reward + closenes_reward - # reward = (1-displ_normalized)#dir_reward + angles_reward + closenes_reward - - return reward diff --git a/gym_pybullet_drones/envs/single_agent_rl/flight/FlightIMU.py b/gym_pybullet_drones/envs/single_agent_rl/flight/FlightIMU.py deleted file mode 100644 index 6024e69..0000000 --- a/gym_pybullet_drones/envs/single_agent_rl/flight/FlightIMU.py +++ /dev/null @@ -1,144 +0,0 @@ -from gym_pybullet_drones.envs.single_agent_rl import BaseRL -from gymnasium import spaces -import numpy as np -import pybullet as pb - -from gym_pybullet_drones.vehicles import QuadCopter -from gym_pybullet_drones.devices import Camera, mpu6000, Barometer -from gym_pybullet_drones.utils.state import State -from torch import sigmoid -from copy import deepcopy - -class FlightIMU(BaseRL): - - def __init__( - self, - client=None, - drone=None, - control_system=None, - logger=None, - scene_objects=[], - visualize=False, - record=False, - realtime=False - ): - - self.max_g = 2*9.8 - self.max_ang_vel = 2 #10 - self.max_radius = 1 - - self.target_pos = np.array([0, 0, 1]) - self.randomize = True - - - if client is None: - if visualize: - client = pb.connect(pb.GUI) - else: - client = pb.connect(pb.DIRECT) - - if drone is None: - - sensors= [ - mpu6000(), - Barometer(1e3) - ] - - state = State() - state.world.pos[2] += 0.1 - - drone = QuadCopter( - client=client, - filename= 'custom.urdf',#'cf2x_cam.urdf', - # sensors = [], - sensors = sensors, - state=state - ) - - super().__init__(client, drone, control_system, logger, scene_objects, visualize, record, realtime) - - - def normalize_observation_space(self): - - self.observation_space = spaces.Box( - low=-1*np.ones((1, 7)), - high=np.ones((1, 7)), - dtype=np.float32 - ) - - - def preprocess_action(self, action): - return self.drone.max_rpm/(1+np.exp(-action*2)) - - - def preprocess_observation(self, observation): - - imu = observation["IMU_0"] - imu[:3] = np.clip(imu[:3], -self.max_g, self.max_g)/self.max_g - imu[3:] = np.clip(imu[3:], -self.max_ang_vel, self.max_ang_vel)/self.max_ang_vel - - return np.concatenate(( - imu, - np.array([observation["Bar_0"]/(2*self.max_radius)]) - )).reshape((1, 7)) - - - def check_termination(self): - - term = False - pos = np.copy(self.drone.state.world.pos) - pos[2] -= 1 - is_out = sum(pos**2) > self.max_radius**2 - if is_out: - term = True - return term - - - def create_initial_state(self): - state = super().create_initial_state() - if self.randomize: - new_pos = np.random.rand(3) - new_pos[:2] -= 0.5 - new_pos[2] = max(new_pos[2]*2, 0.2) - else: - new_pos = np.zeros(3) - new_pos[2] = 0.2 - - state.world.pos = new_pos - return state - - - def create_initial_action(self): - return np.zeros(4) - - - def reward(self): - - safe_radius= self.max_radius - - state = deepcopy(self.drone.state) - - disp = np.array([0, 0, 1]) - state.world.pos - displ_dir = disp/np.linalg.norm(disp) - - displ_normalized = np.sum(disp**2)/(safe_radius)**2 - - vel = state.world.vel - flight_dir = vel/np.linalg.norm(vel) - - vel_normalized = np.sum(vel**2)/(self.drone.max_speed/2)**2 - - closenes_reward = (1-displ_normalized)#*(1-vel_normalized) - if np.sum(disp**2)<0.1: - closenes_reward +=1 - - dir_reward = np.dot(flight_dir, displ_dir) - if dir_reward < 0: - dir_reward *= np.linalg.norm(vel) - - angles_reward = 0#-np.linalg.norm(state.world.ang_vel) - - reward = dir_reward + angles_reward + closenes_reward - # reward = (1-displ_normalized)#dir_reward + angles_reward + closenes_reward - - return reward diff --git a/gym_pybullet_drones/envs/single_agent_rl/flight/__init__.py b/gym_pybullet_drones/envs/single_agent_rl/flight/__init__.py index 5317061..2badbd2 100644 --- a/gym_pybullet_drones/envs/single_agent_rl/flight/__init__.py +++ b/gym_pybullet_drones/envs/single_agent_rl/flight/__init__.py @@ -1,3 +1,3 @@ from .FlightFullState import FlightFullState -from .FlightGPS import FlightGPS -from .FlightIMU import FlightIMU \ No newline at end of file +# from .FlightGPS import FlightGPS +# from .FlightIMU import FlightIMU \ No newline at end of file diff --git a/gym_pybullet_drones/examples/flight/flight_learn.py b/gym_pybullet_drones/examples/flight/flight_learn.py new file mode 100644 index 0000000..aa5cb5c --- /dev/null +++ b/gym_pybullet_drones/examples/flight/flight_learn.py @@ -0,0 +1,75 @@ +from stable_baselines3 import PPO, SAC, TD3 +from gym_pybullet_drones.envs.single_agent_rl import FlightFullState +import time +import torch +import os +from stable_baselines3.common.callbacks import EvalCallback + + +def main(test=True): + + savedir = '/home/led/robotics/engines/Bullet_sym/gym-pybullet-drones/gym_pybullet_drones/results/flight' + savepath= os.path.join( + savedir, + 'best_model' + # "best_model_ppo_longlearn" + # 'best_model_ppo_nonorm_imu_BEST' + # 'best_model_ppo_nonorm' + # 'best_model_random_noize' + ) + + trainer = PPO + # trainer = SAC + + # env_class = HoverIMU + # env_class = HoverGPS + env_class = FlightFullState + policy_kwargs = dict(net_arch=dict(pi=[64, 64], qf=[64, 64])) + + env = env_class() + # env.randomize = False + agent = trainer( + 'MlpPolicy', + env=env, + verbose=1, + tensorboard_log=savedir, + # policy_kwargs=policy_kwargs + # n_steps=6000 + ) + + eval_callback = EvalCallback(env, best_model_save_path=savedir, + log_path=savedir, eval_freq=10000, + deterministic=True, render=False) + + test_only=False + # test_only=True + if not test_only: + agent.learn(1000000, callback=eval_callback) + agent.save(savepath) + env = env_class(visualize=True) + # env.randomize = False + agent = trainer.load(savepath, env=env) + + state, _=env.reset() + rew = 0 + while test: + action, _ = agent.predict( + state.reshape(1,-1), + deterministic=True + ) + + state, reward, terminated, truncated, info = env.step(action) + # print(state, reward) + msg = f"POS {state[0, :3]} VEL{state[0, 6:9]}, ACC {state[0, 12:15]}" + print(msg) + rew+=reward + + time.sleep(env.timestep) + if terminated or truncated: + print(rew) + rew=0 + state, _=env.reset() + + +if __name__=='__main__': + main() \ No newline at end of file diff --git a/gym_pybullet_drones/examples/flight/flight_test.py b/gym_pybullet_drones/examples/flight/flight_test.py new file mode 100644 index 0000000..fa40780 --- /dev/null +++ b/gym_pybullet_drones/examples/flight/flight_test.py @@ -0,0 +1,55 @@ +from stable_baselines3 import PPO, SAC, TD3 +from gym_pybullet_drones.envs.single_agent_rl import FlightFullState +import time +# import torch +import os + + +def main(test=True): + + savedir = '/home/led/robotics/engines/Bullet_sym/gym-pybullet-drones/gym_pybullet_drones/results/flight' + savepath= os.path.join( + savedir, + 'best_model' + # "best_model_ppo_longlearn" + # 'best_model_ppo_nonorm_imu_BEST' + # 'best_model_ppo_nonorm' + # 'best_model_random_noize' + ) + + trainer = PPO + # trainer = SAC + + # env_class = HoverIMU + # env_class = HoverGPS + env_class = FlightFullState + + policy_kwargs = dict(net_arch=dict(pi=[64, 64], qf=[64, 64])) + + env = env_class(visualize=True) + env.randomize = False + agent = trainer.load(savepath, env=env) + + state, _=env.reset() + rew = 0 + while test: + action, _ = agent.predict( + state.reshape(1,-1), + deterministic=True + ) + state, reward, terminated, truncated, info = env.step(action) + # print(state, reward) + + msg = f"POS {state[0, :3]} VEL{state[0, 6:9]}, ACC {state[0, 12:15]}" + print(msg) + rew+=reward + + time.sleep(env.timestep) + if terminated or truncated: + print(rew) + rew=0 + state, _=env.reset() + + +if __name__=='__main__': + main() \ No newline at end of file diff --git a/gym_pybullet_drones/examples/hover_rl.py b/gym_pybullet_drones/examples/hover/hover_learn.py similarity index 94% rename from gym_pybullet_drones/examples/hover_rl.py rename to gym_pybullet_drones/examples/hover/hover_learn.py index be71cd6..bf4d088 100644 --- a/gym_pybullet_drones/examples/hover_rl.py +++ b/gym_pybullet_drones/examples/hover/hover_learn.py @@ -8,7 +8,7 @@ def main(test=True): - savedir = '/home/led/Simulators/Bullet/gym-pybullet-drones/gym_pybullet_drones/results' + savedir = '/home/led/robotics/engines/Bullet_sym/gym-pybullet-drones/gym_pybullet_drones/results/hover' savepath= os.path.join( savedir, 'best_model' diff --git a/gym_pybullet_drones/examples/hover/hover_test.py b/gym_pybullet_drones/examples/hover/hover_test.py new file mode 100644 index 0000000..5e5e995 --- /dev/null +++ b/gym_pybullet_drones/examples/hover/hover_test.py @@ -0,0 +1,55 @@ +from stable_baselines3 import PPO, SAC, TD3 +from gym_pybullet_drones.envs.single_agent_rl import HoverIMU, HoverGPS, HoverFullState +import time +# import torch +import os + + +def main(test=True): + + savedir = '/home/led/robotics/engines/Bullet_sym/gym-pybullet-drones/gym_pybullet_drones/results/hover' + savepath= os.path.join( + savedir, + 'best_model' + # "best_model_ppo_longlearn" + # 'best_model_ppo_nonorm_imu_BEST' + # 'best_model_ppo_nonorm' + # 'best_model_random_noize' + ) + + trainer = PPO + # trainer = SAC + + # env_class = HoverIMU + # env_class = HoverGPS + env_class = HoverFullState + + policy_kwargs = dict(net_arch=dict(pi=[64, 64], qf=[64, 64])) + + env = env_class(visualize=True) + env.randomize = False + agent = trainer.load(savepath, env=env) + + state, _=env.reset() + rew = 0 + while test: + action, _ = agent.predict( + state.reshape(1,-1), + deterministic=True + ) + state, reward, terminated, truncated, info = env.step(action) + # print(state, reward) + + msg = f"POS {state[0, :3]} VEL{state[0, 6:9]}, ACC {state[0, 12:15]}" + print(msg) + rew+=reward + + time.sleep(env.timestep) + if terminated or truncated: + print(rew) + rew=0 + state, _=env.reset() + + +if __name__=='__main__': + main() \ No newline at end of file