Skip to content

Commit

Permalink
Action buffer in observations
Browse files Browse the repository at this point in the history
  • Loading branch information
JacopoPan committed Nov 19, 2023
1 parent 1b14a1a commit a693f43
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 208 deletions.
66 changes: 35 additions & 31 deletions gym_pybullet_drones/envs/BaseRLAviary.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import numpy as np
import pybullet as p
from gymnasium import spaces
from collections import deque

from gym_pybullet_drones.envs.BaseAviary import BaseAviary
from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType, ImageType
Expand Down Expand Up @@ -61,6 +62,10 @@ def __init__(self,
The type of action space (1 or 3D; RPMS, thurst and torques, waypoint or velocity with PID control; etc.)
"""
#### Create a buffer for the last 10 actions ###############
self.ACTION_BUFFER_SIZE = 10
self.action_buffer = deque(maxlen=self.ACTION_BUFFER_SIZE)
####
vision_attributes = True if obs == ObservationType.RGB else False
self.OBS_TYPE = obs
self.ACT_TYPE = act
Expand Down Expand Up @@ -145,6 +150,10 @@ def _actionSpace(self):
exit()
act_lower_bound = np.array([-1*np.ones(size) for i in range(self.NUM_DRONES)])
act_upper_bound = np.array([+1*np.ones(size) for i in range(self.NUM_DRONES)])
#
for i in range(self.ACTION_BUFFER_SIZE):
self.action_buffer.append(np.zeros((self.NUM_DRONES,size)))
#
return spaces.Box(low=act_lower_bound, high=act_upper_bound, dtype=np.float32)

################################################################################
Expand Down Expand Up @@ -176,6 +185,7 @@ def _preprocessAction(self,
commanded to the 4 motors of each drone.
"""
self.action_buffer.append(action)
rpm = np.zeros((self.NUM_DRONES,4))
for k in range(action.shape[0]):
target = action[k, :]
Expand Down Expand Up @@ -246,16 +256,25 @@ def _observationSpace(self):
shape=(self.NUM_DRONES, self.IMG_RES[1], self.IMG_RES[0], 4), dtype=np.uint8)
elif self.OBS_TYPE == ObservationType.KIN:
############################################################
#### OBS OF SIZE 20 (WITH QUATERNION AND RPMS)
#### Observation vector ### X Y Z Q1 Q2 Q3 Q4 R P Y VX VY VZ WX WY WZ P0 P1 P2 P3
# obs_lower_bound = np.array([-1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1])
# obs_upper_bound = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
# return spaces.Box( low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32 )
############################################################
#### OBS SPACE OF SIZE 12, W/O RPMS
#### OBS SPACE OF SIZE 12
#### Observation vector ### X Y Z Q1 Q2 Q3 Q4 R P Y VX VY VZ WX WY WZ
obs_lower_bound = np.array([[-1,-1,0, -1,-1,-1, -1,-1,-1, -1,-1,-1] for i in range(self.NUM_DRONES)])
obs_upper_bound = np.array([[1,1,1, 1,1,1, 1,1,1, 1,1,1] for i in range(self.NUM_DRONES)])
lo = -np.inf
hi = np.inf
obs_lower_bound = np.array([[lo,lo,0, lo,lo,lo,lo,lo,lo,lo,lo,lo] for i in range(self.NUM_DRONES)])
obs_upper_bound = np.array([[hi,hi,hi,hi,hi,hi,hi,hi,hi,hi,hi,hi] for i in range(self.NUM_DRONES)])
#### Add action buffer to observation space ################
act_lo = -1
act_hi = +1
for i in range(self.ACTION_BUFFER_SIZE):
if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL]:
obs_lower_bound = np.hstack([obs_lower_bound, np.array([[act_lo,act_lo,act_lo,act_lo] for i in range(self.NUM_DRONES)])])
obs_upper_bound = np.hstack([obs_upper_bound, np.array([[act_hi,act_hi,act_hi,act_hi] for i in range(self.NUM_DRONES)])])
elif self.ACT_TYPE==ActionType.PID:
obs_lower_bound = np.hstack([obs_lower_bound, np.array([[act_lo,act_lo,act_lo] for i in range(self.NUM_DRONES)])])
obs_upper_bound = np.hstack([obs_upper_bound, np.array([[act_hi,act_hi,act_hi] for i in range(self.NUM_DRONES)])])
elif self.ACT_TYPE in [ActionType.ONE_D_RPM, ActionType.ONE_D_PID]:
obs_lower_bound = np.hstack([obs_lower_bound, np.array([[act_lo] for i in range(self.NUM_DRONES)])])
obs_upper_bound = np.hstack([obs_upper_bound, np.array([[act_hi] for i in range(self.NUM_DRONES)])])
return spaces.Box(low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32)
############################################################
else:
Expand Down Expand Up @@ -287,33 +306,18 @@ def _computeObs(self):
)
return np.array([self.rgb[i] for i in range(self.NUM_DRONES)]).astype('float32')
elif self.OBS_TYPE == ObservationType.KIN:
############################################################
#### OBS OF SIZE 20 (WITH QUATERNION AND RPMS)
# return np.array([ self._clipAndNormalizeState(self._getDroneStateVector(i)) for i in range(self.NUM_DRONES) ]).astype('float32')
############################################################
#### OBS SPACE OF SIZE 12
obs_12 = np.zeros((self.NUM_DRONES,12))
for i in range(self.NUM_DRONES):
obs = self._clipAndNormalizeState(self._getDroneStateVector(i))
#obs = self._clipAndNormalizeState(self._getDroneStateVector(i))
obs = self._getDroneStateVector(i)
obs_12[i, :] = np.hstack([obs[0:3], obs[7:10], obs[10:13], obs[13:16]]).reshape(12,)
return np.array([obs_12[i, :] for i in range(self.NUM_DRONES)]).astype('float32')
ret = np.array([obs_12[i, :] for i in range(self.NUM_DRONES)]).astype('float32')
#### Add action buffer to observation #######################
for i in range(self.ACTION_BUFFER_SIZE):
ret = np.hstack([ret, np.array([self.action_buffer[i][j, :] for j in range(self.NUM_DRONES)])])
return ret
############################################################
else:
print("[ERROR] in BaseRLAviary._computeObs()")

################################################################################

def _clipAndNormalizeState(self,
state
):
"""Normalizes a drone's state to the [-1,1] range.
Must be implemented in a subclass.
Parameters
----------
state : ndarray
Array containing the non-normalized state of a single drone.
"""
raise NotImplementedError
88 changes: 0 additions & 88 deletions gym_pybullet_drones/envs/HoverAviary.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,91 +124,3 @@ def _computeInfo(self):
"""
return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years

################################################################################

def _clipAndNormalizeState(self,
state
):
"""Normalizes a drone's state to the [-1,1] range.
Parameters
----------
state : ndarray
(20,)-shaped array of floats containing the non-normalized state of a single drone.
Returns
-------
ndarray
(20,)-shaped array of floats containing the normalized state of a single drone.
"""
MAX_LIN_VEL_XY = 3
MAX_LIN_VEL_Z = 1

MAX_XY = MAX_LIN_VEL_XY*self.EPISODE_LEN_SEC
MAX_Z = MAX_LIN_VEL_Z*self.EPISODE_LEN_SEC

MAX_PITCH_ROLL = np.pi # Full range

clipped_pos_xy = np.clip(state[0:2], -MAX_XY, MAX_XY)
clipped_pos_z = np.clip(state[2], 0, MAX_Z)
clipped_rp = np.clip(state[7:9], -MAX_PITCH_ROLL, MAX_PITCH_ROLL)
clipped_vel_xy = np.clip(state[10:12], -MAX_LIN_VEL_XY, MAX_LIN_VEL_XY)
clipped_vel_z = np.clip(state[12], -MAX_LIN_VEL_Z, MAX_LIN_VEL_Z)

if self.GUI:
self._clipAndNormalizeStateWarning(state,
clipped_pos_xy,
clipped_pos_z,
clipped_rp,
clipped_vel_xy,
clipped_vel_z
)

normalized_pos_xy = clipped_pos_xy / MAX_XY
normalized_pos_z = clipped_pos_z / MAX_Z
normalized_rp = clipped_rp / MAX_PITCH_ROLL
normalized_y = state[9] / np.pi # No reason to clip
normalized_vel_xy = clipped_vel_xy / MAX_LIN_VEL_XY
normalized_vel_z = clipped_vel_z / MAX_LIN_VEL_XY
normalized_ang_vel = state[13:16]/np.linalg.norm(state[13:16]) if np.linalg.norm(state[13:16]) != 0 else state[13:16]

norm_and_clipped = np.hstack([normalized_pos_xy,
normalized_pos_z,
state[3:7],
normalized_rp,
normalized_y,
normalized_vel_xy,
normalized_vel_z,
normalized_ang_vel,
state[16:20]
]).reshape(20,)

return norm_and_clipped

################################################################################

def _clipAndNormalizeStateWarning(self,
state,
clipped_pos_xy,
clipped_pos_z,
clipped_rp,
clipped_vel_xy,
clipped_vel_z,
):
"""Debugging printouts associated to `_clipAndNormalizeState`.
Print a warning if values in a state vector is out of the clipping range.
"""
if not(clipped_pos_xy == np.array(state[0:2])).all():
print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped xy position [{:.2f} {:.2f}]".format(state[0], state[1]))
if not(clipped_pos_z == np.array(state[2])).all():
print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped z position [{:.2f}]".format(state[2]))
if not(clipped_rp == np.array(state[7:9])).all():
print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped roll/pitch [{:.2f} {:.2f}]".format(state[7], state[8]))
if not(clipped_vel_xy == np.array(state[10:12])).all():
print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped xy velocity [{:.2f} {:.2f}]".format(state[10], state[11]))
if not(clipped_vel_z == np.array(state[12])).all():
print("[WARNING] it", self.step_counter, "in HoverAviary._clipAndNormalizeState(), clipped z velocity [{:.2f}]".format(state[12]))
89 changes: 0 additions & 89 deletions gym_pybullet_drones/envs/LeaderFollowerAviary.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ def _computeReward(self):
The reward.
"""
rewards = np.zeros(self.NUM_DRONES)
states = np.array([self._getDroneStateVector(i) for i in range(self.NUM_DRONES)])
ret = max(0, 500 - np.linalg.norm(self.TARGET_POS-states[0, 0:3])**2)
for i in range(1, self.NUM_DRONES):
Expand Down Expand Up @@ -134,91 +133,3 @@ def _computeInfo(self):
"""
return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years

################################################################################

def _clipAndNormalizeState(self,
state
):
"""Normalizes a drone's state to the [-1,1] range.
Parameters
----------
state : ndarray
(20,)-shaped array of floats containing the non-normalized state of a single drone.
Returns
-------
ndarray
(20,)-shaped array of floats containing the normalized state of a single drone.
"""
MAX_LIN_VEL_XY = 3
MAX_LIN_VEL_Z = 1

MAX_XY = MAX_LIN_VEL_XY*self.EPISODE_LEN_SEC
MAX_Z = MAX_LIN_VEL_Z*self.EPISODE_LEN_SEC

MAX_PITCH_ROLL = np.pi # Full range

clipped_pos_xy = np.clip(state[0:2], -MAX_XY, MAX_XY)
clipped_pos_z = np.clip(state[2], 0, MAX_Z)
clipped_rp = np.clip(state[7:9], -MAX_PITCH_ROLL, MAX_PITCH_ROLL)
clipped_vel_xy = np.clip(state[10:12], -MAX_LIN_VEL_XY, MAX_LIN_VEL_XY)
clipped_vel_z = np.clip(state[12], -MAX_LIN_VEL_Z, MAX_LIN_VEL_Z)

if self.GUI:
self._clipAndNormalizeStateWarning(state,
clipped_pos_xy,
clipped_pos_z,
clipped_rp,
clipped_vel_xy,
clipped_vel_z
)

normalized_pos_xy = clipped_pos_xy / MAX_XY
normalized_pos_z = clipped_pos_z / MAX_Z
normalized_rp = clipped_rp / MAX_PITCH_ROLL
normalized_y = state[9] / np.pi # No reason to clip
normalized_vel_xy = clipped_vel_xy / MAX_LIN_VEL_XY
normalized_vel_z = clipped_vel_z / MAX_LIN_VEL_XY
normalized_ang_vel = state[13:16]/np.linalg.norm(state[13:16]) if np.linalg.norm(state[13:16]) != 0 else state[13:16]

norm_and_clipped = np.hstack([normalized_pos_xy,
normalized_pos_z,
state[3:7],
normalized_rp,
normalized_y,
normalized_vel_xy,
normalized_vel_z,
normalized_ang_vel,
state[16:20]
]).reshape(20,)

return norm_and_clipped

################################################################################

def _clipAndNormalizeStateWarning(self,
state,
clipped_pos_xy,
clipped_pos_z,
clipped_rp,
clipped_vel_xy,
clipped_vel_z,
):
"""Debugging printouts associated to `_clipAndNormalizeState`.
Print a warning if values in a state vector is out of the clipping range.
"""
if not(clipped_pos_xy == np.array(state[0:2])).all():
print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped xy position [{:.2f} {:.2f}]".format(state[0], state[1]))
if not(clipped_pos_z == np.array(state[2])).all():
print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped z position [{:.2f}]".format(state[2]))
if not(clipped_rp == np.array(state[7:9])).all():
print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped roll/pitch [{:.2f} {:.2f}]".format(state[7], state[8]))
if not(clipped_vel_xy == np.array(state[10:12])).all():
print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped xy velocity [{:.2f} {:.2f}]".format(state[10], state[11]))
if not(clipped_vel_z == np.array(state[12])).all():
print("[WARNING] it", self.step_counter, "in LeaderFollowerAviary._clipAndNormalizeState(), clipped z velocity [{:.2f}]".format(state[12]))

0 comments on commit a693f43

Please sign in to comment.