Skip to content


Reinstated old, untested learn and replay scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
JacopoPan committed Oct 16, 2023
1 parent dd95218 commit 5da6022
Show file tree
Hide file tree
Showing 5 changed files with 411 additions and 297 deletions.
2 changes: 1 addition & 1 deletion gym_pybullet_drones/envs/
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def __init__(self,
obs: ObservationType=ObservationType.KIN,
act: ActionType=ActionType.RPM
act: ActionType=ActionType.PID
"""Initialization of a single agent RL environment.
Expand Down
19 changes: 9 additions & 10 deletions gym_pybullet_drones/envs/
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def __init__(self,
obs: ObservationType=ObservationType.KIN,
act: ActionType=ActionType.RPM):
act: ActionType=ActionType.PID):
"""Initialization of a multi-agent RL environment.
Using the generic multi-agent RL superclass.
Expand Down Expand Up @@ -70,32 +70,31 @@ def __init__(self,

def _computeReward(self):
"""Computes the current reward value(s).
"""Computes the current reward value.
dict[int, float]
The reward value for each drone.
The reward.
rewards = np.zeros(self.NUM_DRONES)
states = np.array([self._getDroneStateVector(i) for i in range(self.NUM_DRONES)])
rewards[0] = -1 * np.linalg.norm(np.array([0, 0, 0.5]) - states[0, 0:3])**2
# rewards[1] = -1 * np.linalg.norm(np.array([states[1, 0], states[1, 1], 0.5]) - states[1, 0:3])**2 # DEBUG WITH INDEPENDENT REWARD
for i in range(1, self.NUM_DRONES):
rewards[i] = (-(1/self.NUM_DRONES) * np.linalg.norm(np.array([states[i, 0], states[i, 1], states[0, 2]]) - states[i, 0:3])**2)
# for i in range(1, self.NUM_DRONES):
# rewards[i] = (-(1/self.NUM_DRONES) * np.linalg.norm(np.array([states[i, 0], states[i, 1], states[0, 2]]) - states[i, 0:3])**2)
return rewards[0] #TODO: return multiple rewards


def _computeTerminated(self):
"""Computes the current done value(s).
"""Computes the current done value.
dict[int | "__all__", bool]
Dictionary with the done value of each drone and
one additional boolean value for key "__all__".
Whether the current episode is done.
bool_val = True if self.step_counter/self.PYB_FREQ > self.EPISODE_LEN_SEC else False
Expand Down
288 changes: 2 additions & 286 deletions gym_pybullet_drones/examples/
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, colab=DEFAULT_COLAB, record_video=DEFAULT_RECORD_VIDEO):

#### Check the environment's spaces ########################
# env = gym.make('hover-aviary-v0')
env = gym.make('leaderfollower-aviary-v0')
env = gym.make('hover-aviary-v0')
# env = gym.make('leaderfollower-aviary-v0')
print('[INFO] Action space:', env.action_space)
print('[INFO] Observation space:', env.observation_space)

Expand Down Expand Up @@ -93,287 +93,3 @@ def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, colab=D
ARGS = parser.parse_args()


# """Learning script for single agent problems.

# Agents are based on `stable_baselines3`'s implementation of A2C, PPO SAC, TD3, DDPG.

# Example
# -------
# To run the script, type in a terminal:

# $ python --env <env> --algo <alg> --obs <ObservationType> --act <ActionType> --cpu <cpu_num>

# Notes
# -----
# Use:

# $ tensorboard --logdir ./results/save-<env>-<algo>-<obs>-<act>-<time-date>/tb/

# To check the tensorboard results at:

# http://localhost:6006/

# """
# import os
# import time
# from datetime import datetime
# from sys import platform
# import argparse
# import subprocess
# import numpy as np
# import gymnasium as gym
# import torch
# from stable_baselines3.common.env_checker import check_env
# from stable_baselines3.common.cmd_util import make_vec_env # Module cmd_util will be renamed to env_util
# from stable_baselines3.common.vec_env import SubprocVecEnv, VecTransposeImage
# from stable_baselines3.common.utils import set_random_seed
# from stable_baselines3 import A2C
# from stable_baselines3 import PPO
# from stable_baselines3 import SAC
# from stable_baselines3 import TD3
# from stable_baselines3 import DDPG
# from stable_baselines3.common.policies import ActorCriticPolicy as a2cppoMlpPolicy
# from stable_baselines3.common.policies import ActorCriticCnnPolicy as a2cppoCnnPolicy
# from stable_baselines3.sac.policies import SACPolicy as sacMlpPolicy
# from stable_baselines3.sac import CnnPolicy as sacCnnPolicy
# from stable_baselines3.td3 import MlpPolicy as td3ddpgMlpPolicy
# from stable_baselines3.td3 import CnnPolicy as td3ddpgCnnPolicy
# from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback, StopTrainingOnRewardThreshold

# from gym_pybullet_drones.envs.single_agent_rl.TakeoffAviary import TakeoffAviary
# from gym_pybullet_drones.envs.single_agent_rl.HoverAviary import HoverAviary
# from gym_pybullet_drones.envs.single_agent_rl.FlyThruGateAviary import FlyThruGateAviary
# from gym_pybullet_drones.envs.single_agent_rl.TuneAviary import TuneAviary
# from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType

# import shared_constants

# EPISODE_REWARD_THRESHOLD = -0 # Upperbound: rewards are always negative, but non-zero
# """float: Reward threshold to halt the script."""

# DEFAULT_ENV = 'hover'
# DEFAULT_ALGO = 'ppo'
# DEFAULT_OBS = ObservationType('kin')
# DEFAULT_ACT = ActionType('one_d_rpm')

# def run(
# ):

# #### Save directory ########################################
# filename = os.path.join(output_folder, 'save-'+env+'-'+algo+'-'+obs.value+'-'+act.value+'-'"%m.%d.%Y_%H.%M.%S"))
# if not os.path.exists(filename):
# os.makedirs(filename+'/')

# #### Print out current git commit hash #####################
# if (platform == "linux" or platform == "darwin") and ('GITHUB_ACTIONS' not in os.environ.keys()):
# git_commit = subprocess.check_output(["git", "describe", "--tags"]).strip()
# with open(filename+'/git_commit.txt', 'w+') as f:
# f.write(str(git_commit))

# #### Warning ###############################################
# if env == 'tune' and act != ActionType.TUN:
# print("\n\n\n[WARNING] TuneAviary is intended for use with ActionType.TUN\n\n\n")
# if act == ActionType.ONE_D_RPM or act == ActionType.ONE_D_DYN or act == ActionType.ONE_D_PID:
# print("\n\n\n[WARNING] Simplified 1D problem for debugging purposes\n\n\n")
# #### Errors ################################################
# if not env in ['takeoff', 'hover']:
# print("[ERROR] 1D action space is only compatible with Takeoff and HoverAviary")
# exit()
# if act == ActionType.TUN and env != 'tune' :
# print("[ERROR] ActionType.TUN is only compatible with TuneAviary")
# exit()
# if algo in ['sac', 'td3', 'ddpg'] and cpu!=1:
# print("[ERROR] The selected algorithm does not support multiple environments")
# exit()

# #### Uncomment to debug slurm scripts ######################
# # exit()

# env_name = env+"-aviary-v0"
# sa_env_kwargs = dict(aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, obs=obs, act=act)
# # train_env = gym.make(env_name, aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, obs=obs, act=act) # single environment instead of a vectorized one
# if env_name == "takeoff-aviary-v0":
# train_env = make_vec_env(TakeoffAviary,
# env_kwargs=sa_env_kwargs,
# n_envs=cpu,
# seed=0
# )
# if env_name == "hover-aviary-v0":
# train_env = make_vec_env(HoverAviary,
# env_kwargs=sa_env_kwargs,
# n_envs=cpu,
# seed=0
# )
# if env_name == "flythrugate-aviary-v0":
# train_env = make_vec_env(FlyThruGateAviary,
# env_kwargs=sa_env_kwargs,
# n_envs=cpu,
# seed=0
# )
# if env_name == "tune-aviary-v0":
# train_env = make_vec_env(TuneAviary,
# env_kwargs=sa_env_kwargs,
# n_envs=cpu,
# seed=0
# )
# print("[INFO] Action space:", train_env.action_space)
# print("[INFO] Observation space:", train_env.observation_space)
# # check_env(train_env, warn=True, skip_render_check=True)

# #### On-policy algorithms ##################################
# onpolicy_kwargs = dict(activation_fn=torch.nn.ReLU,
# net_arch=[512, 512, dict(vf=[256, 128], pi=[256, 128])]
# ) # or None
# if algo == 'a2c':
# model = A2C(a2cppoMlpPolicy,
# train_env,
# policy_kwargs=onpolicy_kwargs,
# tensorboard_log=filename+'/tb/',
# verbose=1
# ) if obs == ObservationType.KIN else A2C(a2cppoCnnPolicy,
# train_env,
# policy_kwargs=onpolicy_kwargs,
# tensorboard_log=filename+'/tb/',
# verbose=1
# )
# if algo == 'ppo':
# model = PPO(a2cppoMlpPolicy,
# train_env,
# policy_kwargs=onpolicy_kwargs,
# tensorboard_log=filename+'/tb/',
# verbose=1
# ) if obs == ObservationType.KIN else PPO(a2cppoCnnPolicy,
# train_env,
# policy_kwargs=onpolicy_kwargs,
# tensorboard_log=filename+'/tb/',
# verbose=1
# )

# #### Off-policy algorithms #################################
# offpolicy_kwargs = dict(activation_fn=torch.nn.ReLU,
# net_arch=[512, 512, 256, 128]
# ) # or None # or dict(net_arch=dict(qf=[256, 128, 64, 32], pi=[256, 128, 64, 32]))
# if algo == 'sac':
# model = SAC(sacMlpPolicy,
# train_env,
# policy_kwargs=offpolicy_kwargs,
# tensorboard_log=filename+'/tb/',
# verbose=1
# ) if obs==ObservationType.KIN else SAC(sacCnnPolicy,
# train_env,
# policy_kwargs=offpolicy_kwargs,
# tensorboard_log=filename+'/tb/',
# verbose=1
# )
# if algo == 'td3':
# model = TD3(td3ddpgMlpPolicy,
# train_env,
# policy_kwargs=offpolicy_kwargs,
# tensorboard_log=filename+'/tb/',
# verbose=1
# ) if obs==ObservationType.KIN else TD3(td3ddpgCnnPolicy,
# train_env,
# policy_kwargs=offpolicy_kwargs,
# tensorboard_log=filename+'/tb/',
# verbose=1
# )
# if algo == 'ddpg':
# model = DDPG(td3ddpgMlpPolicy,
# train_env,
# policy_kwargs=offpolicy_kwargs,
# tensorboard_log=filename+'/tb/',
# verbose=1
# ) if obs==ObservationType.KIN else DDPG(td3ddpgCnnPolicy,
# train_env,
# policy_kwargs=offpolicy_kwargs,
# tensorboard_log=filename+'/tb/',
# verbose=1
# )

# #### Create eveluation environment #########################
# if obs == ObservationType.KIN:
# eval_env = gym.make(env_name,
# aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
# obs=obs,
# act=act
# )
# elif obs == ObservationType.RGB:
# if env_name == "takeoff-aviary-v0":
# eval_env = make_vec_env(TakeoffAviary,
# env_kwargs=sa_env_kwargs,
# n_envs=1,
# seed=0
# )
# if env_name == "hover-aviary-v0":
# eval_env = make_vec_env(HoverAviary,
# env_kwargs=sa_env_kwargs,
# n_envs=1,
# seed=0
# )
# if env_name == "flythrugate-aviary-v0":
# eval_env = make_vec_env(FlyThruGateAviary,
# env_kwargs=sa_env_kwargs,
# n_envs=1,
# seed=0
# )
# if env_name == "tune-aviary-v0":
# eval_env = make_vec_env(TuneAviary,
# env_kwargs=sa_env_kwargs,
# n_envs=1,
# seed=0
# )
# eval_env = VecTransposeImage(eval_env)

# #### Train the model #######################################
# # checkpoint_callback = CheckpointCallback(save_freq=1000, save_path=filename+'-logs/', name_prefix='rl_model')
# callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=EPISODE_REWARD_THRESHOLD,
# verbose=1
# )
# eval_callback = EvalCallback(eval_env,
# callback_on_new_best=callback_on_best,
# verbose=1,
# best_model_save_path=filename+'/',
# log_path=filename+'/',
# eval_freq=int(2000/cpu),
# deterministic=True,
# render=False
# )
# model.learn(total_timesteps=steps, #int(1e12),
# callback=eval_callback,
# log_interval=100,
# )

# #### Save the model ########################################
# print(filename)

# #### Print training progression ############################
# with np.load(filename+'/evaluations.npz') as data:
# for j in range(data['timesteps'].shape[0]):
# print(str(data['timesteps'][j])+","+str(data['results'][j][0]))

# if __name__ == "__main__":
# #### Define and parse (optional) arguments for the script ##
# parser = argparse.ArgumentParser(description='Single agent reinforcement learning experiments script')
# parser.add_argument('--env', default=DEFAULT_ENV, type=str, choices=['takeoff', 'hover', 'flythrugate', 'tune'], help='Task (default: hover)', metavar='')
# parser.add_argument('--algo', default=DEFAULT_ALGO, type=str, choices=['a2c', 'ppo', 'sac', 'td3', 'ddpg'], help='RL agent (default: ppo)', metavar='')
# parser.add_argument('--obs', default=DEFAULT_OBS, type=ObservationType, help='Observation space (default: kin)', metavar='')
# parser.add_argument('--act', default=DEFAULT_ACT, type=ActionType, help='Action space (default: one_d_rpm)', metavar='')
# parser.add_argument('--cpu', default=DEFAULT_CPU, type=int, help='Number of training environments (default: 1)', metavar='')
# parser.add_argument('--steps', default=DEFAULT_STEPS, type=int, help='Number of training time steps (default: 35000)', metavar='')
# parser.add_argument('--output_folder', default=DEFAULT_OUTPUT_FOLDER, type=str, help='Folder where to save logs (default: "results")', metavar='')
# ARGS = parser.parse_args()

# run(**vars(ARGS))

0 comments on commit 5da6022

Please sign in to comment.