diff --git a/abm/agent/experiments/exp_0/Screenshot 2024-01-03 at 13.31.25.png b/abm/agent/experiments/exp_0/Screenshot 2024-01-03 at 13.31.25.png new file mode 100644 index 00000000..6fe493b1 Binary files /dev/null and b/abm/agent/experiments/exp_0/Screenshot 2024-01-03 at 13.31.25.png differ diff --git a/abm/agent/experiments/exp_0/hyperparameters.txt b/abm/agent/experiments/exp_0/hyperparameters.txt new file mode 100644 index 00000000..d3e6194b --- /dev/null +++ b/abm/agent/experiments/exp_0/hyperparameters.txt @@ -0,0 +1,25 @@ +DQN Architecture and Hyperparameters: +DQNAgent: +- number of agents: 2 +- state_size: self.v_field_res+ 1, action_size=3 +- action_size: 3 [1: explore, 2: exploit, 3: relocate] +- replay_memory_capacity: 10,000 +- batch_size: 128 +- gamma: 0.99 +- epsilon_start: 0.9 +- tau: 0.005 +- epsilon_decay: 1000 +- epsilon_end: 0.05 +- lr: 1e-4 +- reward = collected / time + +DQNetwork: +- input_size: [Specify the size of the input] +- output_size: [Specify the size of the output layer] + +Training Process: +- Experience replay with a deque (max capacity: 10,000) +- Epsilon-greedy exploration +- Q-network trained with mini-batches (batch size: 128) +- Mean Squared Error (MSE) loss +- Target Q-network updated with soft update (tau: 0.005) every step diff --git a/abm/agent/experiments/exp_0/model_0.pth b/abm/agent/experiments/exp_0/model_0.pth new file mode 100644 index 00000000..2f20ca92 Binary files /dev/null and b/abm/agent/experiments/exp_0/model_0.pth differ diff --git a/abm/agent/experiments/exp_0/model_1.pth b/abm/agent/experiments/exp_0/model_1.pth new file mode 100644 index 00000000..558d0eb0 Binary files /dev/null and b/abm/agent/experiments/exp_0/model_1.pth differ diff --git a/abm/agent/experiments/exp_0/tf_logs/events.out.tfevents.1704280575.Feriels-MBP.fritz.box.31020.0 b/abm/agent/experiments/exp_0/tf_logs/events.out.tfevents.1704280575.Feriels-MBP.fritz.box.31020.0 new file mode 100644 index 00000000..67c2c96d Binary files /dev/null and b/abm/agent/experiments/exp_0/tf_logs/events.out.tfevents.1704280575.Feriels-MBP.fritz.box.31020.0 differ diff --git a/abm/agent/experiments/exp_1/Screenshot 2024-01-08 at 12.46.22.png b/abm/agent/experiments/exp_1/Screenshot 2024-01-08 at 12.46.22.png new file mode 100644 index 00000000..aebf6f83 Binary files /dev/null and b/abm/agent/experiments/exp_1/Screenshot 2024-01-08 at 12.46.22.png differ diff --git a/abm/agent/experiments/exp_1/hyperparameters.txt b/abm/agent/experiments/exp_1/hyperparameters.txt new file mode 100644 index 00000000..001fa95a --- /dev/null +++ b/abm/agent/experiments/exp_1/hyperparameters.txt @@ -0,0 +1,25 @@ +DQN Architecture and Hyperparameters: +DQNAgent: +- number of agents: 2 +- state_size: self.v_field_res+ 1, action_size=3 +- action_size: 3 [1: explore, 2: exploit, 3: relocate] +- replay_memory_capacity: 10,000 +- batch_size: 128 +- gamma: 0.99 +- epsilon_start: 0.9 +- tau: 0.005 +- epsilon_decay: 1000 +- epsilon_end: 0.05 +- lr: 1e-5 +- reward = collected / time + +DQNetwork: +- input_size: [Specify the size of the input] +- output_size: [Specify the size of the output layer] + +Training Process: +- Experience replay with a deque (max capacity: 10,000) +- Epsilon-greedy exploration +- Q-network trained with mini-batches (batch size: 128) +- Mean Squared Error (MSE) loss +- Target Q-network updated with soft update (tau: 0.005) every step diff --git a/abm/agent/experiments/exp_1/model_0.pth b/abm/agent/experiments/exp_1/model_0.pth new file mode 100644 index 00000000..2c4171cc Binary files /dev/null and b/abm/agent/experiments/exp_1/model_0.pth differ diff --git a/abm/agent/experiments/exp_1/model_1.pth b/abm/agent/experiments/exp_1/model_1.pth new file mode 100644 index 00000000..1216d74a Binary files /dev/null and b/abm/agent/experiments/exp_1/model_1.pth differ diff --git a/abm/agent/experiments/exp_1/tf_logs/events.out.tfevents.1704285608.Feriels-MBP.fritz.box.31812.0 b/abm/agent/experiments/exp_1/tf_logs/events.out.tfevents.1704285608.Feriels-MBP.fritz.box.31812.0 new file mode 100644 index 00000000..21fc8234 Binary files /dev/null and b/abm/agent/experiments/exp_1/tf_logs/events.out.tfevents.1704285608.Feriels-MBP.fritz.box.31812.0 differ diff --git a/abm/agent/experiments/exp_10/Screenshot 2024-01-08 at 13.01.19.png b/abm/agent/experiments/exp_10/Screenshot 2024-01-08 at 13.01.19.png new file mode 100644 index 00000000..3a6df115 Binary files /dev/null and b/abm/agent/experiments/exp_10/Screenshot 2024-01-08 at 13.01.19.png differ diff --git a/abm/agent/experiments/exp_10/Screenshot 2024-01-08 at 13.01.36.png b/abm/agent/experiments/exp_10/Screenshot 2024-01-08 at 13.01.36.png new file mode 100644 index 00000000..3258227a Binary files /dev/null and b/abm/agent/experiments/exp_10/Screenshot 2024-01-08 at 13.01.36.png differ diff --git a/abm/agent/experiments/exp_10/hyperparameters.txt b/abm/agent/experiments/exp_10/hyperparameters.txt new file mode 100644 index 00000000..2b4b0d99 --- /dev/null +++ b/abm/agent/experiments/exp_10/hyperparameters.txt @@ -0,0 +1,27 @@ +DQN-Architecture and Hyperparameters: + +DQNAgent: +- number of agents: 3 +- state_size: self.v_field_res+ 1, action_size=3 +- action_size: 3 [1: explore, 2: exploit, 3: relocate] +- replay_memory_capacity: 10,000 +- batch_size: 128 +- gamma: 0.99 +- epsilon_start: 0.9 +- tau: 0.005 +- epsilon_decay: 1000 +- epsilon_end: 0.05 +- lr: 1e-5 with scheduler +- if self.t!=0 reward= (0.2*ag.collected_r + 0.8*collective_reward) /self.t else reward=0 +where collective_reward = sum of ag.collected_r / (self.t*len(agents)) + +DQNetwork: +- input_size: [Specify the size of the input] +- output_size: [Specify the size of the output layer] + +Training Process: +- Experience replay with a deque (max capacity: 10,000) +- Epsilon-greedy exploration +- Q-network trained with mini-batches (batch size: 128) +- Mean Squared Error (MSE) loss +- Target Q-network updated with soft update (tau: 0.005) every 50 iterations diff --git a/abm/agent/experiments/exp_10/model_0.pth b/abm/agent/experiments/exp_10/model_0.pth new file mode 100644 index 00000000..6694f2c4 Binary files /dev/null and b/abm/agent/experiments/exp_10/model_0.pth differ diff --git a/abm/agent/experiments/exp_10/model_1.pth b/abm/agent/experiments/exp_10/model_1.pth new file mode 100644 index 00000000..9fcc3b70 Binary files /dev/null and b/abm/agent/experiments/exp_10/model_1.pth differ diff --git a/abm/agent/experiments/exp_10/model_2.pth b/abm/agent/experiments/exp_10/model_2.pth new file mode 100644 index 00000000..1651b830 Binary files /dev/null and b/abm/agent/experiments/exp_10/model_2.pth differ diff --git a/abm/agent/experiments/exp_10/tf_logs/events.out.tfevents.1704708244.Feriels-MacBook-Pro.local.57757.0 b/abm/agent/experiments/exp_10/tf_logs/events.out.tfevents.1704708244.Feriels-MacBook-Pro.local.57757.0 new file mode 100644 index 00000000..96c99ada Binary files /dev/null and b/abm/agent/experiments/exp_10/tf_logs/events.out.tfevents.1704708244.Feriels-MacBook-Pro.local.57757.0 differ diff --git a/abm/agent/experiments/exp_2/Screenshot 2024-01-08 at 12.47.16.png b/abm/agent/experiments/exp_2/Screenshot 2024-01-08 at 12.47.16.png new file mode 100644 index 00000000..531a50c9 Binary files /dev/null and b/abm/agent/experiments/exp_2/Screenshot 2024-01-08 at 12.47.16.png differ diff --git a/abm/agent/experiments/exp_2/hyperparameters.txt b/abm/agent/experiments/exp_2/hyperparameters.txt new file mode 100644 index 00000000..f7574fd0 --- /dev/null +++ b/abm/agent/experiments/exp_2/hyperparameters.txt @@ -0,0 +1,26 @@ +DQN Architecture and Hyperparameters: +DQNAgent: +- number of agents: 1 +- state_size: self.v_field_res+ 1, action_size=3 +- action_size: 3 [1: explore, 2: exploit, 3: relocate] +- replay_memory_capacity: 10,000 +- batch_size: 128 +- gamma: 0.99 +- epsilon_start: 0.9 +- tau: 0.005 +- epsilon_decay: 1000 +- epsilon_end: 0.05 +- lr: 1e-5 with scheduler +- reward = collected / time + + +DQNetwork: +- input_size: [Specify the size of the input] +- output_size: [Specify the size of the output layer] + +Training Process: +- Experience replay with a deque (max capacity: 10,000) +- Epsilon-greedy exploration +- Q-network trained with mini-batches (batch size: 128) +- Mean Squared Error (MSE) loss +- Target Q-network updated with soft update (tau: 0.005) every step diff --git a/abm/agent/experiments/exp_2/model_0.pth b/abm/agent/experiments/exp_2/model_0.pth new file mode 100644 index 00000000..224b9bb0 Binary files /dev/null and b/abm/agent/experiments/exp_2/model_0.pth differ diff --git a/abm/agent/experiments/exp_2/tf_logs/events.out.tfevents.1704297112.Feriels-MBP.fritz.box.32628.0 b/abm/agent/experiments/exp_2/tf_logs/events.out.tfevents.1704297112.Feriels-MBP.fritz.box.32628.0 new file mode 100644 index 00000000..b60ba1a9 Binary files /dev/null and b/abm/agent/experiments/exp_2/tf_logs/events.out.tfevents.1704297112.Feriels-MBP.fritz.box.32628.0 differ diff --git a/abm/agent/experiments/exp_3/Screenshot 2024-01-08 at 12.49.08.png b/abm/agent/experiments/exp_3/Screenshot 2024-01-08 at 12.49.08.png new file mode 100644 index 00000000..f53e7b06 Binary files /dev/null and b/abm/agent/experiments/exp_3/Screenshot 2024-01-08 at 12.49.08.png differ diff --git a/abm/agent/experiments/exp_3/hyperparameters.txt b/abm/agent/experiments/exp_3/hyperparameters.txt new file mode 100644 index 00000000..b7f6167a --- /dev/null +++ b/abm/agent/experiments/exp_3/hyperparameters.txt @@ -0,0 +1,26 @@ +DQN Architecture and Hyperparameters: +DQNAgent: +- number of agents: 1 +- state_size: self.v_field_res+ 1, action_size=3 +- action_size: 3 [1: explore, 2: exploit, 3: relocate] +- replay_memory_capacity: 10,000 +- batch_size: 128 +- gamma: 0.99 +- epsilon_start: 0.9 +- tau: 0.005 +- epsilon_decay: 1000 +- epsilon_end: 0.05 +- lr: 1e-5 with scheduler +- reward = collected / time + + +DQNetwork: +- input_size: [Specify the size of the input] +- output_size: [Specify the size of the output layer] + +Training Process: +- Experience replay with a deque (max capacity: 10,000) +- Epsilon-greedy exploration +- Q-network trained with mini-batches (batch size: 128) +- Mean Squared Error (MSE) loss +- Target Q-network updated with soft update (tau: 0.005) every 50 iterations diff --git a/abm/agent/experiments/exp_3/model_0.pth b/abm/agent/experiments/exp_3/model_0.pth new file mode 100644 index 00000000..53a54920 Binary files /dev/null and b/abm/agent/experiments/exp_3/model_0.pth differ diff --git a/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704302074.Feriels-MBP.fritz.box.33609.0 b/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704302074.Feriels-MBP.fritz.box.33609.0 new file mode 100644 index 00000000..8fc8f238 Binary files /dev/null and b/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704302074.Feriels-MBP.fritz.box.33609.0 differ diff --git a/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704302118.Feriels-MBP.fritz.box.33626.0 b/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704302118.Feriels-MBP.fritz.box.33626.0 new file mode 100644 index 00000000..432f9753 Binary files /dev/null and b/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704302118.Feriels-MBP.fritz.box.33626.0 differ diff --git a/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704303586.Feriels-MBP.fritz.box.33871.0 b/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704303586.Feriels-MBP.fritz.box.33871.0 new file mode 100644 index 00000000..c7fa0e2a Binary files /dev/null and b/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704303586.Feriels-MBP.fritz.box.33871.0 differ diff --git a/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704303788.Feriels-MBP.fritz.box.33909.0 b/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704303788.Feriels-MBP.fritz.box.33909.0 new file mode 100644 index 00000000..d2360e8d Binary files /dev/null and b/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704303788.Feriels-MBP.fritz.box.33909.0 differ diff --git a/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704309517.Feriels-MBP.fritz.box.34798.0 b/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704309517.Feriels-MBP.fritz.box.34798.0 new file mode 100644 index 00000000..44940dac Binary files /dev/null and b/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704309517.Feriels-MBP.fritz.box.34798.0 differ diff --git a/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704310165.Feriels-MBP.fritz.box.34920.0 b/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704310165.Feriels-MBP.fritz.box.34920.0 new file mode 100644 index 00000000..090db045 Binary files /dev/null and b/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704310165.Feriels-MBP.fritz.box.34920.0 differ diff --git a/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704312280.Feriels-MBP.fritz.box.35263.0 b/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704312280.Feriels-MBP.fritz.box.35263.0 new file mode 100644 index 00000000..24a20fef Binary files /dev/null and b/abm/agent/experiments/exp_3/tf_logs/events.out.tfevents.1704312280.Feriels-MBP.fritz.box.35263.0 differ diff --git a/abm/agent/experiments/exp_4/hyperparameters.txt b/abm/agent/experiments/exp_4/hyperparameters.txt new file mode 100644 index 00000000..9a042dfd --- /dev/null +++ b/abm/agent/experiments/exp_4/hyperparameters.txt @@ -0,0 +1,25 @@ +DQN Architecture and Hyperparameters: +DQNAgent: +- number of agents: 1 +- state_size: self.v_field_res+ 1, action_size=3 +- action_size: 3 [1: explore, 2: exploit, 3: relocate] +- replay_memory_capacity: 10,000 +- batch_size: 128 +- gamma: 0.99 +- epsilon_start: 0.9 +- tau: 0.005 +- epsilon_decay: 1000 +- epsilon_end: 0.05 +- lr: 1e-5 with scheduler +- reward = 1 if resource is exploited, 0 otherwise + +DQNetwork: +- input_size: [Specify the size of the input] +- output_size: [Specify the size of the output layer] + +Training Process: +- Experience replay with a deque (max capacity: 10,000) +- Epsilon-greedy exploration +- Q-network trained with mini-batches (batch size: 128) +- Mean Squared Error (MSE) loss +- Target Q-network updated with soft update (tau: 0.005) every 50 iterations diff --git a/abm/agent/experiments/exp_4/img.png b/abm/agent/experiments/exp_4/img.png new file mode 100644 index 00000000..6a3d445e Binary files /dev/null and b/abm/agent/experiments/exp_4/img.png differ diff --git a/abm/agent/experiments/exp_4/model_0.pth b/abm/agent/experiments/exp_4/model_0.pth new file mode 100644 index 00000000..6882b1d0 Binary files /dev/null and b/abm/agent/experiments/exp_4/model_0.pth differ diff --git a/abm/agent/experiments/exp_4/model_1.pth b/abm/agent/experiments/exp_4/model_1.pth new file mode 100644 index 00000000..701e15d1 Binary files /dev/null and b/abm/agent/experiments/exp_4/model_1.pth differ diff --git a/abm/agent/experiments/exp_4/tf_logs/events.out.tfevents.1704539912.Feriels-MacBook-Pro.local.46631.0 b/abm/agent/experiments/exp_4/tf_logs/events.out.tfevents.1704539912.Feriels-MacBook-Pro.local.46631.0 new file mode 100644 index 00000000..185cfa82 Binary files /dev/null and b/abm/agent/experiments/exp_4/tf_logs/events.out.tfevents.1704539912.Feriels-MacBook-Pro.local.46631.0 differ diff --git a/abm/agent/experiments/exp_5/Screenshot 2024-01-06 at 14.02.34.png b/abm/agent/experiments/exp_5/Screenshot 2024-01-06 at 14.02.34.png new file mode 100644 index 00000000..9a81b538 Binary files /dev/null and b/abm/agent/experiments/exp_5/Screenshot 2024-01-06 at 14.02.34.png differ diff --git a/abm/agent/experiments/exp_5/hyperparameters.txt b/abm/agent/experiments/exp_5/hyperparameters.txt new file mode 100644 index 00000000..4fb97f4b --- /dev/null +++ b/abm/agent/experiments/exp_5/hyperparameters.txt @@ -0,0 +1,26 @@ +DQN Architecture and Hyperparameters: +DQNAgent: +- number of agents: 2 +- state_size: self.v_field_res+ 1, action_size=3 +- action_size: 3 [1: explore, 2: exploit, 3: relocate] +- replay_memory_capacity: 10,000 +- batch_size: 128 +- gamma: 0.99 +- epsilon_start: 0.9 +- tau: 0.005 +- epsilon_decay: 1000 +- epsilon_end: 0.05 +- lr: 1e-5 with scheduler +- o if not exploit else reward = collected / time + + +DQNetwork: +- input_size: [Specify the size of the input] +- output_size: [Specify the size of the output layer] + +Training Process: +- Experience replay with a deque (max capacity: 10,000) +- Epsilon-greedy exploration +- Q-network trained with mini-batches (batch size: 128) +- Mean Squared Error (MSE) loss +- Target Q-network updated with soft update (tau: 0.005) every 50 iterations diff --git a/abm/agent/experiments/exp_5/model_0.pth b/abm/agent/experiments/exp_5/model_0.pth new file mode 100644 index 00000000..832c0caf Binary files /dev/null and b/abm/agent/experiments/exp_5/model_0.pth differ diff --git a/abm/agent/experiments/exp_5/model_1.pth b/abm/agent/experiments/exp_5/model_1.pth new file mode 100644 index 00000000..746b553f Binary files /dev/null and b/abm/agent/experiments/exp_5/model_1.pth differ diff --git a/abm/agent/experiments/exp_5/tf_logs/events.out.tfevents.1704543170.Feriels-MacBook-Pro.local.47226.0 b/abm/agent/experiments/exp_5/tf_logs/events.out.tfevents.1704543170.Feriels-MacBook-Pro.local.47226.0 new file mode 100644 index 00000000..009cb075 Binary files /dev/null and b/abm/agent/experiments/exp_5/tf_logs/events.out.tfevents.1704543170.Feriels-MacBook-Pro.local.47226.0 differ diff --git a/abm/agent/experiments/exp_6/Screenshot 2024-01-06 at 14.50.46.png b/abm/agent/experiments/exp_6/Screenshot 2024-01-06 at 14.50.46.png new file mode 100644 index 00000000..fc5b49d5 Binary files /dev/null and b/abm/agent/experiments/exp_6/Screenshot 2024-01-06 at 14.50.46.png differ diff --git a/abm/agent/experiments/exp_6/hyperparameters.txt b/abm/agent/experiments/exp_6/hyperparameters.txt new file mode 100644 index 00000000..35025a7a --- /dev/null +++ b/abm/agent/experiments/exp_6/hyperparameters.txt @@ -0,0 +1,26 @@ +DQN-Architecture and Hyperparameters: + +DQNAgent: +- number of agents: 2 +- state_size: self.v_field_res+ 1, action_size=3 +- action_size: 3 [1: explore, 2: exploit, 3: relocate] +- replay_memory_capacity: 10,000 +- batch_size: 128 +- gamma: 0.99 +- epsilon_start: 0.9 +- tau: 0.005 +- epsilon_decay: 1000 +- epsilon_end: 0.05 +- lr: 1e-5 with scheduler +- if self.t!=0 reward= ag.collected_r /self.t else reward=0 + +DQNetwork: +- input_size: [Specify the size of the input] +- output_size: [Specify the size of the output layer] + +Training Process: +- Experience replay with a deque (max capacity: 10,000) +- Epsilon-greedy exploration +- Q-network trained with mini-batches (batch size: 128) +- Mean Squared Error (MSE) loss +- Target Q-network updated with soft update (tau: 0.005) every 50 iterations diff --git a/abm/agent/experiments/exp_6/model_0.pth b/abm/agent/experiments/exp_6/model_0.pth new file mode 100644 index 00000000..9640bae3 Binary files /dev/null and b/abm/agent/experiments/exp_6/model_0.pth differ diff --git a/abm/agent/experiments/exp_6/model_1.pth b/abm/agent/experiments/exp_6/model_1.pth new file mode 100644 index 00000000..5d27b068 Binary files /dev/null and b/abm/agent/experiments/exp_6/model_1.pth differ diff --git a/abm/agent/experiments/exp_6/tf_logs/events.out.tfevents.1704546260.Feriels-MacBook-Pro.local.47833.0 b/abm/agent/experiments/exp_6/tf_logs/events.out.tfevents.1704546260.Feriels-MacBook-Pro.local.47833.0 new file mode 100644 index 00000000..e2d8b375 Binary files /dev/null and b/abm/agent/experiments/exp_6/tf_logs/events.out.tfevents.1704546260.Feriels-MacBook-Pro.local.47833.0 differ diff --git a/abm/agent/experiments/exp_6/tf_logs/events.out.tfevents.1704546641.Feriels-MacBook-Pro.local.47913.0 b/abm/agent/experiments/exp_6/tf_logs/events.out.tfevents.1704546641.Feriels-MacBook-Pro.local.47913.0 new file mode 100644 index 00000000..fb6e5fc2 Binary files /dev/null and b/abm/agent/experiments/exp_6/tf_logs/events.out.tfevents.1704546641.Feriels-MacBook-Pro.local.47913.0 differ diff --git a/abm/agent/experiments/exp_7/Screenshot 2024-01-08 at 12.52.38.png b/abm/agent/experiments/exp_7/Screenshot 2024-01-08 at 12.52.38.png new file mode 100644 index 00000000..834eefed Binary files /dev/null and b/abm/agent/experiments/exp_7/Screenshot 2024-01-08 at 12.52.38.png differ diff --git a/abm/agent/experiments/exp_7/Screenshot 2024-01-08 at 12.53.06.png b/abm/agent/experiments/exp_7/Screenshot 2024-01-08 at 12.53.06.png new file mode 100644 index 00000000..39840cf4 Binary files /dev/null and b/abm/agent/experiments/exp_7/Screenshot 2024-01-08 at 12.53.06.png differ diff --git a/abm/agent/experiments/exp_7/hyperparameters.txt b/abm/agent/experiments/exp_7/hyperparameters.txt new file mode 100644 index 00000000..cbbb2e26 --- /dev/null +++ b/abm/agent/experiments/exp_7/hyperparameters.txt @@ -0,0 +1,26 @@ +DQN-Architecture and Hyperparameters: + +DQNAgent: +- number of agents: 3 +- state_size: self.v_field_res+ 1, action_size=3 +- action_size: 3 [1: explore, 2: exploit, 3: relocate] +- replay_memory_capacity: 10,000 +- batch_size: 128 +- gamma: 0.99 +- epsilon_start: 0.9 +- tau: 0.005 +- epsilon_decay: 1000 +- epsilon_end: 0.05 +- lr: 1e-5 with scheduler +- if self.t!=0 reward= ag.collected_r /self.t else reward=0 + +DQNetwork: +- input_size: [Specify the size of the input] +- output_size: [Specify the size of the output layer] + +Training Process: +- Experience replay with a deque (max capacity: 10,000) +- Epsilon-greedy exploration +- Q-network trained with mini-batches (batch size: 128) +- Mean Squared Error (MSE) loss +- Target Q-network updated with soft update (tau: 0.005) every 50 iterations diff --git a/abm/agent/experiments/exp_7/model_0.pth b/abm/agent/experiments/exp_7/model_0.pth new file mode 100644 index 00000000..dd98595b Binary files /dev/null and b/abm/agent/experiments/exp_7/model_0.pth differ diff --git a/abm/agent/experiments/exp_7/model_1.pth b/abm/agent/experiments/exp_7/model_1.pth new file mode 100644 index 00000000..41a011b8 Binary files /dev/null and b/abm/agent/experiments/exp_7/model_1.pth differ diff --git a/abm/agent/experiments/exp_7/model_2.pth b/abm/agent/experiments/exp_7/model_2.pth new file mode 100644 index 00000000..579b0c19 Binary files /dev/null and b/abm/agent/experiments/exp_7/model_2.pth differ diff --git a/abm/agent/experiments/exp_7/tf_logs/events.out.tfevents.1704559485.Feriels-MacBook-Pro.local.49934.0 b/abm/agent/experiments/exp_7/tf_logs/events.out.tfevents.1704559485.Feriels-MacBook-Pro.local.49934.0 new file mode 100644 index 00000000..fb106b77 Binary files /dev/null and b/abm/agent/experiments/exp_7/tf_logs/events.out.tfevents.1704559485.Feriels-MacBook-Pro.local.49934.0 differ diff --git a/abm/agent/experiments/exp_8/Screenshot 2024-01-08 at 12.54.50.png b/abm/agent/experiments/exp_8/Screenshot 2024-01-08 at 12.54.50.png new file mode 100644 index 00000000..bfac42fc Binary files /dev/null and b/abm/agent/experiments/exp_8/Screenshot 2024-01-08 at 12.54.50.png differ diff --git a/abm/agent/experiments/exp_8/Screenshot 2024-01-08 at 12.55.28.png b/abm/agent/experiments/exp_8/Screenshot 2024-01-08 at 12.55.28.png new file mode 100644 index 00000000..77a2d440 Binary files /dev/null and b/abm/agent/experiments/exp_8/Screenshot 2024-01-08 at 12.55.28.png differ diff --git a/abm/agent/experiments/exp_8/hyperparameters.txt b/abm/agent/experiments/exp_8/hyperparameters.txt new file mode 100644 index 00000000..c65eb1ef --- /dev/null +++ b/abm/agent/experiments/exp_8/hyperparameters.txt @@ -0,0 +1,26 @@ +DQN-Architecture and Hyperparameters: + +DQNAgent: +- number of agents: 3 +- state_size: self.v_field_res+ 1, action_size=3 +- action_size: 3 [1: explore, 2: exploit, 3: relocate] +- replay_memory_capacity: 10,000 +- batch_size: 128 +- gamma: 0.99 +- epsilon_start: 0.9 +- tau: 0.005 +- epsilon_decay: 1000 +- epsilon_end: 0.05 +- lr: 1e-5 with scheduler +- if self.t!=0 reward= ag.collected_r + ag.collective_reward /self.t else reward=0 + +DQNetwork: +- input_size: [Specify the size of the input] +- output_size: [Specify the size of the output layer] + +Training Process: +- Experience replay with a deque (max capacity: 10,000) +- Epsilon-greedy exploration +- Q-network trained with mini-batches (batch size: 128) +- Mean Squared Error (MSE) loss +- Target Q-network updated with soft update (tau: 0.005) every 50 iterations diff --git a/abm/agent/experiments/exp_8/model_0.pth b/abm/agent/experiments/exp_8/model_0.pth new file mode 100644 index 00000000..c05390de Binary files /dev/null and b/abm/agent/experiments/exp_8/model_0.pth differ diff --git a/abm/agent/experiments/exp_8/model_1.pth b/abm/agent/experiments/exp_8/model_1.pth new file mode 100644 index 00000000..902949de Binary files /dev/null and b/abm/agent/experiments/exp_8/model_1.pth differ diff --git a/abm/agent/experiments/exp_8/model_2.pth b/abm/agent/experiments/exp_8/model_2.pth new file mode 100644 index 00000000..b37741f1 Binary files /dev/null and b/abm/agent/experiments/exp_8/model_2.pth differ diff --git a/abm/agent/experiments/exp_8/tf_logs/events.out.tfevents.1704625868.Feriels-MacBook-Pro.local.53954.0 b/abm/agent/experiments/exp_8/tf_logs/events.out.tfevents.1704625868.Feriels-MacBook-Pro.local.53954.0 new file mode 100644 index 00000000..d60bc55f Binary files /dev/null and b/abm/agent/experiments/exp_8/tf_logs/events.out.tfevents.1704625868.Feriels-MacBook-Pro.local.53954.0 differ diff --git a/abm/agent/experiments/exp_9/Screenshot 2024-01-08 at 12.56.41.png b/abm/agent/experiments/exp_9/Screenshot 2024-01-08 at 12.56.41.png new file mode 100644 index 00000000..77181c54 Binary files /dev/null and b/abm/agent/experiments/exp_9/Screenshot 2024-01-08 at 12.56.41.png differ diff --git a/abm/agent/experiments/exp_9/Screenshot 2024-01-08 at 12.57.07.png b/abm/agent/experiments/exp_9/Screenshot 2024-01-08 at 12.57.07.png new file mode 100644 index 00000000..93dee03c Binary files /dev/null and b/abm/agent/experiments/exp_9/Screenshot 2024-01-08 at 12.57.07.png differ diff --git a/abm/agent/experiments/exp_9/hyperparameters.txt b/abm/agent/experiments/exp_9/hyperparameters.txt new file mode 100644 index 00000000..2b4b0d99 --- /dev/null +++ b/abm/agent/experiments/exp_9/hyperparameters.txt @@ -0,0 +1,27 @@ +DQN-Architecture and Hyperparameters: + +DQNAgent: +- number of agents: 3 +- state_size: self.v_field_res+ 1, action_size=3 +- action_size: 3 [1: explore, 2: exploit, 3: relocate] +- replay_memory_capacity: 10,000 +- batch_size: 128 +- gamma: 0.99 +- epsilon_start: 0.9 +- tau: 0.005 +- epsilon_decay: 1000 +- epsilon_end: 0.05 +- lr: 1e-5 with scheduler +- if self.t!=0 reward= (0.2*ag.collected_r + 0.8*collective_reward) /self.t else reward=0 +where collective_reward = sum of ag.collected_r / (self.t*len(agents)) + +DQNetwork: +- input_size: [Specify the size of the input] +- output_size: [Specify the size of the output layer] + +Training Process: +- Experience replay with a deque (max capacity: 10,000) +- Epsilon-greedy exploration +- Q-network trained with mini-batches (batch size: 128) +- Mean Squared Error (MSE) loss +- Target Q-network updated with soft update (tau: 0.005) every 50 iterations diff --git a/abm/agent/experiments/exp_9/model_0.pth b/abm/agent/experiments/exp_9/model_0.pth new file mode 100644 index 00000000..e7b83567 Binary files /dev/null and b/abm/agent/experiments/exp_9/model_0.pth differ diff --git a/abm/agent/experiments/exp_9/model_1.pth b/abm/agent/experiments/exp_9/model_1.pth new file mode 100644 index 00000000..d21bfbff Binary files /dev/null and b/abm/agent/experiments/exp_9/model_1.pth differ diff --git a/abm/agent/experiments/exp_9/model_2.pth b/abm/agent/experiments/exp_9/model_2.pth new file mode 100644 index 00000000..59ace4e7 Binary files /dev/null and b/abm/agent/experiments/exp_9/model_2.pth differ diff --git a/abm/agent/experiments/exp_9/tf_logs/events.out.tfevents.1704644122.Feriels-MacBook-Pro.local.56489.0 b/abm/agent/experiments/exp_9/tf_logs/events.out.tfevents.1704644122.Feriels-MacBook-Pro.local.56489.0 new file mode 100644 index 00000000..753ddbcb Binary files /dev/null and b/abm/agent/experiments/exp_9/tf_logs/events.out.tfevents.1704644122.Feriels-MacBook-Pro.local.56489.0 differ