Skip to content

Commit

Permalink
Revised reward, terminated, truncated in RL aviaries
Browse files Browse the repository at this point in the history
  • Loading branch information
JacopoPan committed Nov 19, 2023
1 parent 0527532 commit 85a726d
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 25 deletions.
20 changes: 12 additions & 8 deletions gym_pybullet_drones/envs/HoverAviary.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def __init__(self,
The type of action space (1 or 3D; RPMS, thurst and torques, or waypoint with PID control)
"""
self.target_pos = np.array([0,0,1])
super().__init__(drone_model=drone_model,
num_drones=1,
initial_xyzs=initial_xyzs,
Expand All @@ -73,7 +74,8 @@ def _computeReward(self):
"""
state = self._getDroneStateVector(0)
return -1 * np.linalg.norm(np.array([0, 0, 1])-state[0:3])**2
ret = max(0, 500 - np.linalg.norm(self.target_pos-state[0:3])**2)
return ret

################################################################################

Expand All @@ -86,25 +88,27 @@ def _computeTerminated(self):
Whether the current episode is done.
"""
if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC:
state = self._getDroneStateVector(0)
if np.linalg.norm(self.target_pos-state[0:3]) < .001:
return True
else:
return False

################################################################################

def _computeTruncated(self):
"""Computes the current truncated value(s).
Unused in this implementation.
"""Computes the current truncated value.
Returns
-------
bool
Always false.
Whether the current episode timed out.
"""
return False
if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC:
return True
else:
return False

################################################################################

Expand Down Expand Up @@ -139,7 +143,7 @@ def _clipAndNormalizeState(self,
(20,)-shaped array of floats containing the normalized state of a single drone.
"""
MAX_LIN_VEL_XY = 3
MAX_LIN_VEL_XY = 3
MAX_LIN_VEL_Z = 1

MAX_XY = MAX_LIN_VEL_XY*self.EPISODE_LEN_SEC
Expand Down
36 changes: 19 additions & 17 deletions gym_pybullet_drones/envs/LeaderFollowerAviary.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def __init__(self,
The type of action space (1 or 3D; RPMS, thurst and torques, or waypoint with PID control)
"""
self.target_pos = np.array([0,0,1])
super().__init__(drone_model=drone_model,
num_drones=num_drones,
neighbourhood_radius=neighbourhood_radius,
Expand Down Expand Up @@ -81,11 +82,10 @@ def _computeReward(self):
"""
rewards = np.zeros(self.NUM_DRONES)
states = np.array([self._getDroneStateVector(i) for i in range(self.NUM_DRONES)])
rewards[0] = -1 * np.linalg.norm(np.array([0, 0, 0.5]) - states[0, 0:3])**2
# rewards[1] = -1 * np.linalg.norm(np.array([states[1, 0], states[1, 1], 0.5]) - states[1, 0:3])**2 # DEBUG WITH INDEPENDENT REWARD
# for i in range(1, self.NUM_DRONES):
# rewards[i] = (-(1/self.NUM_DRONES) * np.linalg.norm(np.array([states[i, 0], states[i, 1], states[0, 2]]) - states[i, 0:3])**2)
return rewards[0] #TODO: return multiple rewards
ret = max(0, 500 - np.linalg.norm(self.target_pos-states[0, 0:3])**2)
for i in range(1, self.NUM_DRONES):
ret += max(0, 100 - np.linalg.norm(states[i-1, 3]-states[i, 3])**2)
return ret

################################################################################

Expand All @@ -98,25 +98,27 @@ def _computeTerminated(self):
Whether the current episode is done.
"""
bool_val = True if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC else False
# done = {i: bool_val for i in range(self.NUM_DRONES)}
# done["__all__"] = bool_val # True if True in done.values() else False
return bool_val #TODO: return multiple terminatation values
state = self._getDroneStateVector(0)
if np.linalg.norm(self.target_pos-state[0:3]) < .001:
return True
else:
return False

################################################################################

def _computeTruncated(self):
"""Computes the current truncated value(s).
Unused in this implementation.
"""Computes the current truncated value.
Returns
-------
bool
Always false.
Whether the current episode timed out.
"""
return False
if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC:
return True
else:
return False

################################################################################

Expand All @@ -127,11 +129,11 @@ def _computeInfo(self):
Returns
-------
dict[int, dict[]]
Dictionary of empty dictionaries.
dict[str, int]
Dummy value.
"""
return {i: {} for i in range(self.NUM_DRONES)}
return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years

################################################################################

Expand Down

0 comments on commit 85a726d

Please sign in to comment.