Avoid some log spam

Helsinki-NLP · Apr 22, 2024 · fbe4f5c · fbe4f5c
1 parent e68e67a
commit fbe4f5c
Show file tree

Hide file tree

Showing 4 changed files with 12 additions and 3 deletions.
diff --git a/mammoth/bin/train.py b/mammoth/bin/train.py
@@ -240,6 +240,9 @@ def train(opts):
             local_rank=local_rank,
             opts=opts
         )
+        if device_context.is_master():
+            # Enough to log this once
+            logger.info(f'TaskQueueManager: {global_task_queue_manager}')
 
         q = mp.Queue(opts.queue_size)
         semaphore = mp.Semaphore(opts.queue_size)

diff --git a/mammoth/distributed/contexts.py b/mammoth/distributed/contexts.py
@@ -29,7 +29,7 @@ def is_gpu(self):
         """Data tensors must be moved to the GPU for compute"""
         return self.context != DeviceContextEnum.CPU
 
-    def global_to_local(self, node_rank, local_rank):
+    def global_to_local(self, node_rank: int, local_rank: int) -> "DeviceContext":
         assert node_rank is not None
         assert local_rank is not None
         return DeviceContext(
@@ -41,7 +41,7 @@ def global_to_local(self, node_rank, local_rank):
         )
 
     @classmethod
-    def from_opts(cls, opts):
+    def from_opts(cls, opts) -> "WorldContext":
         gpus_per_node = len(opts.gpu_ranks)
         world_size = int(opts.world_size) if gpus_per_node > 0 else 0
         multinode = gpus_per_node != world_size

diff --git a/mammoth/model_builder.py b/mammoth/model_builder.py
@@ -287,7 +287,7 @@ def build_task_specific_model(
     task_queue_manager,
     checkpoint,
 ):
-    logger.info(f'TaskQueueManager: {task_queue_manager}')
+    # logger.info(f'TaskQueueManager: {task_queue_manager}')
     if not model_opts.model_task == ModelTask.SEQ2SEQ:
         raise ValueError(f"Only ModelTask.SEQ2SEQ works - {model_opts.model_task} task")
 

diff --git a/tools/config_config.py b/tools/config_config.py
@@ -321,6 +321,9 @@ def corpora_schedule(opts):
         if use_weight:
             multiplier = ae_weight if src_lang == tgt_lang else 1.0
             corpus['weight'] = weight * multiplier
+        else:
+            # Log spam if weight is unset
+            corpus['weight'] = 1
         if use_introduce_at_training_step:
             # TODO: ensure this default always matches with opts.py
             total_steps = opts.in_config[0].get('train_steps', 100_000)
@@ -333,6 +336,9 @@ def corpora_schedule(opts):
                 introduce_at_training_step = round(total_steps * (1 - weight))
             corpus['introduce_at_training_step'] = introduce_at_training_step
             min_introduce_at_training_step = min(min_introduce_at_training_step, introduce_at_training_step)
+        else:
+            # Log spam if introduce_at_training_step is unset
+            corpus['introduce_at_training_step'] = 0
     if use_introduce_at_training_step and min_introduce_at_training_step > 0:
         # With a single very large task that gets split, it is possible that no task can start
         for cname, corpus in opts.in_config[0]['tasks'].items():