Merge pull request #46 from OSUrobotics/behavior_cloning_state_dim

Behavior cloning state dim
OSUrobotics · Aug 13, 2021 · 5333116 · 5333116
2 parents 9d54e01 + b08ce60
commit 5333116
Show file tree

Hide file tree

Showing 19 changed files with 1,295 additions and 905 deletions.
diff --git a/gym-kinova-gripper/DDPGfD.py b/gym-kinova-gripper/DDPGfD.py
@@ -56,6 +56,8 @@ def forward(self, state, action):
 
 class DDPGfD(object):
 	def __init__(self, state_dim=82, action_dim=3, max_action=3, n=5, discount=0.995, tau=0.0005, batch_size=64, expert_sampling_proportion=0.7):
+		print('================================ INITTING DDPGfD with state dim of: ', state_dim,
+			  '===============================')
 		self.actor = Actor(state_dim, action_dim, max_action).to(device)
 		self.actor_target = copy.deepcopy(self.actor)
 		self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=1e-4)
@@ -92,7 +94,7 @@ def select_action(self, state):
 		return self.actor(state).cpu().data.numpy().flatten()
 
 
-	def train(self, episode_step, expert_replay_buffer, replay_buffer=None, prob=0.7):
+	def train(self, episode_step, expert_replay_buffer, replay_buffer=None, prob=0.7, mod_state_idx=np.arange(82)):
 		""" Update policy based on full trajectory of one episode """
 		self.total_it += 1
 
@@ -129,6 +131,21 @@ def train(self, episode_step, expert_replay_buffer, replay_buffer=None, prob=0.7
 		print("IN OG TRAIN: lift_reward_count: ", lift_reward_count)
 		"""
 
+
+		# # TODO: STATE DIM CODE, FOR NON-BATCHING TRAINING. NEEDS TO BE TESTED BEFORE USE. COMMENTED OUT UNTIL TESTED.
+		# print('=============== Start printing - BATCH training =======================')
+		# print('=============== Before modification =======================')
+		# print('state dimensions: ', state.shape)
+		# print('next state dimensions: ', next_state.shape)
+		# # modify state dimensions
+		# state = state[:, mod_state_idx]
+		# next_state = next_state[:, mod_state_idx]
+		#
+		# print('=============== After modification =======================')
+		# print('state dimensions: ', state.shape)
+		# print('next state dimensions: ', next_state.shape)
+		# print('=============== End printing - BATCH training =======================')
+
 		# Target Q network
 		#print("Target Q")
 		target_Q = self.critic_target(next_state, self.actor_target(next_state))
@@ -235,7 +252,7 @@ def train(self, episode_step, expert_replay_buffer, replay_buffer=None, prob=0.7
 		return actor_loss.item(), critic_loss.item(), critic_L1loss.item(), critic_LNloss.item()
 
 
-	def train_batch(self, max_episode_num, episode_num, update_count, expert_replay_buffer, replay_buffer):
+	def train_batch(self, max_episode_num, episode_num, update_count, expert_replay_buffer, replay_buffer, mod_state_idx=np.arange(82)):
 		""" Update policy networks based on batch_size of episodes using n-step returns """
 		self.total_it += 1
 		agent_batch_size = 0
@@ -283,9 +300,29 @@ def train_batch(self, max_episode_num, episode_num, update_count, expert_replay_
 				reward = reward.unsqueeze(0)
 				not_done = not_done.unsqueeze(0)
 
+			expert_state = expert_state[:, :, mod_state_idx]
+			# expert_next_state = expert_next_state[:, :, mod_state_idx]
+
 		reward = reward.unsqueeze(-1)
 		not_done = not_done.unsqueeze(-1)
 
+		# STATE DIMENSION MODIFICATION
+		# print('=============== Start printing - BATCH training =======================')
+		# print('=============== Before modification =======================')
+		# print('state dimensions: ', state.shape)
+		# print('next state dimensions: ', next_state.shape)
+		# print('sanity check - mod_state_idx length: ', len(mod_state_idx))
+
+		# modify state dimensions
+		state = state[:, :, mod_state_idx]
+		next_state = next_state[:, :, mod_state_idx]
+
+
+		# print('=============== After modification =======================')
+		# print('state dimensions: ', state.shape)
+		# print('next state dimensions: ', next_state.shape)
+		# print('=============== End printing - BATCH training =======================')
+
 		### FOR TESTING:
 		#assert_batch_size = self.batch_size * num_trajectories
 		num_timesteps_sampled = len(reward)

diff --git a/...ova-gripper/gym_kinova_gripper/envs/kinova_description/j2s7s300_end_effector_v1_CubeM.xml b/...ova-gripper/gym_kinova_gripper/envs/kinova_description/j2s7s300_end_effector_v1_CubeM.xml
diff --git a/gym-kinova-gripper/gym_kinova_gripper/envs/kinova_gripper_env.py b/gym-kinova-gripper/gym_kinova_gripper/envs/kinova_gripper_env.py
diff --git a/gym-kinova-gripper/main_DDPGfD.py b/gym-kinova-gripper/main_DDPGfD.py
diff --git a/wiki_figures/DDPGfD_diagram.PNG b/wiki_figures/DDPGfD_diagram.PNG
diff --git a/wiki_figures/all_objects.PNG b/wiki_figures/all_objects.PNG
diff --git a/wiki_figures/all_possible_objects.PNG b/wiki_figures/all_possible_objects.PNG
diff --git a/wiki_figures/grasp_trial.PNG b/wiki_figures/grasp_trial.PNG
diff --git a/wiki_figures/hov.PNG b/wiki_figures/hov.PNG
diff --git a/wiki_figures/input_variations.PNG b/wiki_figures/input_variations.PNG
diff --git a/wiki_figures/orientations.PNG b/wiki_figures/orientations.PNG
diff --git a/wiki_figures/policy_training.PNG b/wiki_figures/policy_training.PNG
diff --git a/wiki_figures/sample_update.PNG b/wiki_figures/sample_update.PNG
diff --git a/wiki_figures/shapes_with_titles.PNG b/wiki_figures/shapes_with_titles.PNG
diff --git a/wiki_figures/sizes_of_the_object.PNG b/wiki_figures/sizes_of_the_object.PNG
diff --git a/wiki_figures/touch_vel_PID_Variable_Speed.PNG b/wiki_figures/touch_vel_PID_Variable_Speed.PNG
diff --git a/wiki_figures/training_pipeline.PNG b/wiki_figures/training_pipeline.PNG
diff --git a/wiki_figures/velocity_pid_Variable_Speed.PNG b/wiki_figures/velocity_pid_Variable_Speed.PNG
diff --git a/wiki_figures/wiki_text.txt b/wiki_figures/wiki_text.txt
@@ -0,0 +1 @@
+GitHub wiki images