Skip to content

Commit

Permalink
use deterministic
Browse files Browse the repository at this point in the history
  • Loading branch information
penfever committed Feb 14, 2025
1 parent efc0a04 commit e2e0c7c
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 2 deletions.
5 changes: 4 additions & 1 deletion configs/recipes/smollm/sft/135m/quickstart_train.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ training:
compile: False

dataloader_num_workers: "auto"
dataloader_prefetch_factor: 32
dataloader_prefetch_factor:

seed: 192847
use_deterministic: True

logging_steps: 5
log_model_summary: False
Expand Down
1 change: 1 addition & 0 deletions docs/user_guides/train/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ training:
output_dir: "output" # Directory for saving outputs
run_name: null # Unique identifier for the run
seed: 42 # Random seed for reproducibility
use_deterministic: false # Use deterministic CuDNN algorithms
# Training duration
num_train_epochs: 3 # Number of training epochs
Expand Down
4 changes: 3 additions & 1 deletion src/oumi/cli/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ def train(

limit_per_process_memory()
device_cleanup()
set_random_seeds(parsed_config.training.seed)
set_random_seeds(
parsed_config.training.seed, parsed_config.training.use_deterministic
)

# Run training
oumi_train(parsed_config)
Expand Down
6 changes: 6 additions & 0 deletions src/oumi/core/configs/params/training_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,12 @@ class TrainingParams(BaseParams):
weight initialization, and any stochastic operations.
"""

use_deterministic: bool = False
"""Whether to use deterministic algorithms for reproducibility.
If set to True, this will only allow those CuDNN algorithms
that are (believed to be) deterministic.
"""

run_name: Optional[str] = None
"""A unique identifier for the current training run.
Expand Down

0 comments on commit e2e0c7c

Please sign in to comment.