changed vis field res

scioip34 · Mar 22, 2024 · ee9c6ad · ee9c6ad
1 parent 8ed7364
commit ee9c6ad
Show file tree

Hide file tree

Showing 4 changed files with 99 additions and 13 deletions.
diff --git a/abm/data/metaprotocol/experiments/archive/docker_exp_madrl.py b/abm/data/metaprotocol/experiments/archive/docker_exp_madrl.py
@@ -0,0 +1,87 @@
+"""
+Experiment file using the MetaRunner interfacing language to define a set of criteria for batch simulations
+
+Title:      Experiment 1
+Goal:       Understand the balance of social vs individual excitablility in fixed environment
+Defined by: mezdahun and DominikDeffner @ github
+"""
+from abm.metarunner.metarunner import Tunable, Constant, MetaProtocol
+
+# Defining fixed criteria for all automized simulations/experiments
+fixed_criteria = [
+    Constant("USE_IFDB_LOGGING", 1),
+    Constant("SAVE_CSV_FILES", 1),
+    Constant("WITH_VISUALIZATION", 0),  # how does the simulation speed scale with N
+    Constant("TELEPORT_TO_MIDDLE", 0),
+    Constant("GHOST_WHILE_EXPLOIT", 1),
+    Constant("PATCHWISE_SOCIAL_EXCLUSION", 1),
+    Constant("POOLING_TIME", 0),
+    Constant("MOV_EXP_VEL_MIN", 1),
+    Constant("MOV_EXP_VEL_MAX", 1),
+    Constant("MOV_REL_DES_VEL", 1),
+    Constant("SHOW_VISUAL_FIELDS", 0),
+    Constant("SHOW_VISUAL_FIELDS_RETURN", 0),
+    Constant("SHOW_VISION_RANGE", 0),
+    Constant("ENV_WIDTH", 600),
+    Constant("ENV_HEIGHT", 600),
+    Constant("VISUAL_FIELD_RESOLUTION", 1200),
+    Constant("VISUAL_EXCLUSION", 1),
+    Constant("VISION_RANGE", 1000),
+    Constant("AGENT_FOV", 0.5),  # what is in the VR experiment and how we can control this?
+    Constant("AGENT_CONSUMPTION", 1),
+    Constant("RADIUS_AGENT", 10),
+    Constant("RADIUS_RESOURCE", 40),
+    Constant("MIN_RESOURCE_QUALITY", 0.1),
+    Constant("MAX_RESOURCE_QUALITY", 0.1),
+    Constant("MIN_RESOURCE_PER_PATCH", 100),
+    Constant("MAX_RESOURCE_PER_PATCH", 101),
+    Constant("MOV_EXP_TH_MIN", -0.25),
+    Constant("MOV_EXP_TH_MAX", 0.25),
+    Constant("MOV_REL_TH_MAX", 0.5),
+    Constant("CONS_STOP_RATIO", 0.1),
+    Constant("REGENERATE_PATCHES", 1),
+    Constant("REGENERATE_PATCHES", 5000),
+    Constant("DEC_FN", 0.5),
+    Constant("DEC_FR", 0.5),
+    Constant("DEC_TAU", 10),
+    Constant("DEC_BW", 0),
+    Constant("DEC_WMAX", 1),
+    Constant("DEC_BU", 0),
+    Constant("DEC_UMAX", 1),
+    Constant("DEC_GW", 0.085),
+    Constant("DEC_GU", 0.085),
+    Constant("DEC_TW", 0.5),
+    Constant("DEC_TU", 0.5),
+    Constant("T", 100),
+]
+
+criteria_exp_changing = [
+    Constant("N", 3),
+    Tunable("DEC_EPSW", values_override=[0.25, 0.5]),
+    Tunable("DEC_EPSU", values_override=[0.5, 0.6]),
+    Constant("DEC_SWU", 0),
+    Constant("DEC_SUW", 0),
+    Constant("N_RESOURCES", 1)
+]
+
+# Creating metaprotocol and add defined criteria
+description_text = "Experiment N run with Docker\n" \
+                   "\n" \
+                   "In this experiment we would like to see\n" \
+                   "...\n" \
+                   "\n" \
+                   "and how..."
+mp = MetaProtocol(experiment_name="DockerTest2", num_batches=1,
+                  parallel=False, description=description_text,
+                  headless=True)
+for crit in fixed_criteria:
+    mp.add_criterion(crit)
+for crit in criteria_exp_changing:
+    mp.add_criterion(crit)
+
+# Generating temporary env files with criterion combinations. Comment this out if you want to continue simulating due
+# to interruption
+mp.generate_temp_env_files()
+
+# Running the simulations
+mp.run_protocols()
diff --git a/abm/projects/madrl_foraging/madrl_agent/brain.py b/abm/projects/madrl_foraging/madrl_agent/brain.py
@@ -80,7 +80,6 @@ class DQNetwork(nn.Module):
     def __init__(self, input_size, output_size):
         super(DQNetwork, self).__init__()
         # convolutional layer ?
-
         self.layer1 = nn.Linear(input_size, 512)
         self.layer2 = nn.Linear(512, 256)
         self.layer3 = nn.Linear(256, 128)
@@ -139,7 +138,7 @@ def __init__(self, state_size, action_size):
             self.optimizer = optim.Adam(self.q_network.parameters(), lr=self.lr)
         else:
             print("Using RMSprop")
-            self.optimizer = optim.RMSprop(self.q_network.parameters(), lr=self.lr)
+            self.optimizer = optim.RMSprop(self.q_network.parameters(), lr=self.lr,weight_decay=1e-4)
         #self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=500, gamma=0.9)
 
         # Replay memory

diff --git a/abm/projects/madrl_foraging/madrl_simulation/madrl_sims.py b/abm/projects/madrl_foraging/madrl_simulation/madrl_sims.py
@@ -368,7 +368,7 @@ def start_madqn(self):
                         ag.policy_network.next_state_tensor,
                         ag.policy_network.reward_tensor
                     )
-                    if self.train:
+                    if self.train and self.t % self.train_every == 0:
                         loss = ag.policy_network.optimize()
                         # Update the target network with soft updates
                         ag.policy_network.update_target_network()
@@ -380,8 +380,8 @@ def start_madqn(self):
                                 print(f"Loss is not a number (nan) at timestep {ag.policy_network.steps_done}!")
                             elif loss < 0:
                                 print(f"Loss is negative at timestep {ag.policy_network.steps_done}!")
-                            elif loss > 20:
-                                print(f"Loss is {loss} at timestep {ag.policy_network.steps_done}!")
+                            #elif loss > 20:
+                            #    print(f"Loss is {loss} at timestep {ag.policy_network.steps_done}!")
                             writer.add_scalar(f'Agent_{ag.id}/Loss', loss, ag.policy_network.steps_done)
                         elif ag.policy_network.steps_done > ag.policy_network.batch_size:
                             print(f"Loss is None at timestep {ag.policy_network.steps_done}!")

diff --git a/exp_new.env b/exp_new.env
@@ -3,21 +3,21 @@ ENV_WIDTH=500
 ENV_HEIGHT=500
 WINDOW_PAD=30
 N=3
-N_RESOURCES=100
-N_EPISODES=50
+N_RESOURCES=3
+N_EPISODES=100
 T=20000
 SEED=0
 TRAIN=1
-TRAIN_EVERY=1
+TRAIN_EVERY=10
 PRETRAINED=0
-BATCH_SIZE=64
+BATCH_SIZE=128
 REPLAY_MEMORY_CAPACITY=50000
 GAMMA=0.99
 LR=1e-06
 EPSILON_START=1.0
 EPSILON_END=0.01
 EPSILON_DECAY=50000
-TAU=0.01
+TAU=0.001
 OPTIMIZER=RSMprop
 PRETRAINED_MODELS_DIR=
 BRAIN_TYPE=DQN
@@ -35,7 +35,7 @@ USE_RAM_LOGGING=0
 USE_ZARR_FORMAT=0
 SAVE_CSV_FILES=0
 RADIUS_AGENT=10
-VISUAL_FIELD_RESOLUTION=735
+VISUAL_FIELD_RESOLUTION=600
 AGENT_FOV=1
 VISION_RANGE=2000
 AGENT_CONSUMPTION=1
@@ -47,7 +47,7 @@ GHOST_WHILE_EXPLOIT=1
 POOLING_TIME=0
 POOLING_PROBABILITY=0
 RADIUS_RESOURCE=15
-MIN_RESOURCE_PER_PATCH=24
+MIN_RESOURCE_PER_PATCH=800
 MAX_RESOURCE_PER_PATCH=-1
 REGENERATE_PATCHES=1
 PATCH_BORDER_OVERLAP=1
@@ -75,4 +75,4 @@ DEC_SUW=0
 DEC_TAU=10
 DEC_FN=1
 DEC_FR=1
-SAVE_ROOT_DIR=/Users/ferielamira/Desktop/Uni/Master-thesis/ABM/abm/data/simulation_data/3_agents/smaller_vf/conv
+SAVE_ROOT_DIR=/Users/ferielamira/Desktop/Uni/Master-thesis/ABM/abm/data/simulation_data/3_agents/smaller_vf/slower_target_update