Skip to content

Commit

Permalink
Avoid some log spam
Browse files Browse the repository at this point in the history
  • Loading branch information
Waino committed Apr 22, 2024
1 parent e68e67a commit fbe4f5c
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 3 deletions.
3 changes: 3 additions & 0 deletions mammoth/bin/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,9 @@ def train(opts):
local_rank=local_rank,
opts=opts
)
if device_context.is_master():
# Enough to log this once
logger.info(f'TaskQueueManager: {global_task_queue_manager}')

q = mp.Queue(opts.queue_size)
semaphore = mp.Semaphore(opts.queue_size)
Expand Down
4 changes: 2 additions & 2 deletions mammoth/distributed/contexts.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def is_gpu(self):
"""Data tensors must be moved to the GPU for compute"""
return self.context != DeviceContextEnum.CPU

def global_to_local(self, node_rank, local_rank):
def global_to_local(self, node_rank: int, local_rank: int) -> "DeviceContext":
assert node_rank is not None
assert local_rank is not None
return DeviceContext(
Expand All @@ -41,7 +41,7 @@ def global_to_local(self, node_rank, local_rank):
)

@classmethod
def from_opts(cls, opts):
def from_opts(cls, opts) -> "WorldContext":
gpus_per_node = len(opts.gpu_ranks)
world_size = int(opts.world_size) if gpus_per_node > 0 else 0
multinode = gpus_per_node != world_size
Expand Down
2 changes: 1 addition & 1 deletion mammoth/model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def build_task_specific_model(
task_queue_manager,
checkpoint,
):
logger.info(f'TaskQueueManager: {task_queue_manager}')
# logger.info(f'TaskQueueManager: {task_queue_manager}')
if not model_opts.model_task == ModelTask.SEQ2SEQ:
raise ValueError(f"Only ModelTask.SEQ2SEQ works - {model_opts.model_task} task")

Expand Down
6 changes: 6 additions & 0 deletions tools/config_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,9 @@ def corpora_schedule(opts):
if use_weight:
multiplier = ae_weight if src_lang == tgt_lang else 1.0
corpus['weight'] = weight * multiplier
else:
# Log spam if weight is unset
corpus['weight'] = 1
if use_introduce_at_training_step:
# TODO: ensure this default always matches with opts.py
total_steps = opts.in_config[0].get('train_steps', 100_000)
Expand All @@ -333,6 +336,9 @@ def corpora_schedule(opts):
introduce_at_training_step = round(total_steps * (1 - weight))
corpus['introduce_at_training_step'] = introduce_at_training_step
min_introduce_at_training_step = min(min_introduce_at_training_step, introduce_at_training_step)
else:
# Log spam if introduce_at_training_step is unset
corpus['introduce_at_training_step'] = 0
if use_introduce_at_training_step and min_introduce_at_training_step > 0:
# With a single very large task that gets split, it is possible that no task can start
for cname, corpus in opts.in_config[0]['tasks'].items():
Expand Down

0 comments on commit fbe4f5c

Please sign in to comment.