Skip to content

Commit

Permalink
merge main
Browse files Browse the repository at this point in the history
  • Loading branch information
jessicazhongeee committed Jan 17, 2025
2 parents a80b7e5 + f63eaa4 commit 1b4f781
Show file tree
Hide file tree
Showing 173 changed files with 673 additions and 441 deletions.
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ default_language_version:

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: 6306a48f7dae5861702d573c9c247e4e9498e867
rev: v5.0.0
hooks:
- id: trailing-whitespace
- id: check-ast
Expand All @@ -18,7 +18,7 @@ repos:
exclude: '^(.*\.svg)$'

- repo: https://github.com/Lucas-C/pre-commit-hooks
rev: v1.5.4
rev: v1.5.5
hooks:
- id: insert-license
files: \.py$|\.sh$
Expand All @@ -27,7 +27,7 @@ repos:
- docs/license_header.txt

- repo: https://github.com/pycqa/flake8
rev: 34cbf8ef3950f43d09b85e2e45c15ae5717dc37b
rev: 7.1.1
hooks:
- id: flake8
additional_dependencies:
Expand All @@ -37,15 +37,15 @@ repos:
args: ['--config=.flake8']

- repo: https://github.com/omnilib/ufmt
rev: v2.3.0
rev: v2.8.0
hooks:
- id: ufmt
additional_dependencies:
- black == 22.12.0
- usort == 1.0.5

- repo: https://github.com/jsh9/pydoclint
rev: 94efc5f989adbea30f3534b476b2931a02c1af90
rev: 0.5.12
hooks:
- id: pydoclint
args: [--config=pyproject.toml]
1 change: 1 addition & 0 deletions docs/source/basics/message_transforms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ Example message transforms
--------------------------
- Instruct
- :class:`~torchtune.data.InputOutputToMessages`
- :class:`~torchtune.data.AlpacaToMessages`
- Chat
- :class:`~torchtune.data.ShareGPTToMessages`
- :class:`~torchtune.data.OpenAIToMessages`
Expand Down
2 changes: 1 addition & 1 deletion docs/source/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,4 @@ to the package *without* installing via ``git clone``, you can install with the
If you already have PyTorch installed, torchtune will default to using that version. However, if you want to
use the nightly version of PyTorch, you can append the ``--force-reinstall`` option to the above command. If you
opt for this install method, you will likely need to change the "cpu" suffix in the index url to match your CUDA
version. For example, if you are running CUDA 12, your index url would be "https://download.pytorch.org/whl/nightly/cu121".
version. For example, if you are running CUDA 12, your index url would be "https://download.pytorch.org/whl/nightly/cu126".
2 changes: 1 addition & 1 deletion docs/source/tutorials/llama3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ Running generation with our LoRA-finetuned model, we see the following output:
.. code-block:: bash
tune run generate --config ./custom_generation_config.yaml \
prompt="Hello, my name is"
prompt.user="Hello, my name is"
[generate.py:122] Hello, my name is Sarah and I am a busy working mum of two young children, living in the North East of England.
...
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ target-version = ["py38"]
[tool.pydoclint]
style = 'google'
check-return-types = 'False'
exclude = 'tests/torchtune/models/(\w+)/scripts/'
exclude = 'tests/torchtune/models/(\w+)/scripts/|recipes/|torchtune/modules/_export'

[tool.pytest.ini_options]
addopts = ["--showlocals", "--import-mode=prepend", "--without-integration", "--without-slow-integration"]
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/code_llama2/7B_full_low_memory.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ optimizer:
optimizer_in_bwd: True # True saves memory. Requires gradient_accumulation_steps=1
loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Training env
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/code_llama2/7B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ lr_scheduler:
num_warmup_steps: 100
loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Training env
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/code_llama2/7B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ lr_scheduler:
num_warmup_steps: 100
loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Training env
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma/2B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory
optimizer_in_bwd: False # True saves memory. Requires gradient_accumulation_steps=1

Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma/2B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ batch_size: 4
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Training env
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma/2B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ batch_size: 4
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Training env
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma/2B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ batch_size: 4
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Training env
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma/7B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory
optimizer_in_bwd: False # True saves memory. Requires gradient_accumulation_steps=1

Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma/7B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ batch_size: 4
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Training env
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma/7B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ batch_size: 8
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Training env
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma/7B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ batch_size: 4
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Training env
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma2/27B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory
optimizer_in_bwd: False # True saves memory. Requires gradient_accumulation_steps=1

Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma2/27B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ batch_size: 4
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Training env
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma2/27B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ batch_size: 2
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Training env
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma2/27B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ batch_size: 4
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Training env
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma2/2B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory
optimizer_in_bwd: False # True saves memory. Requires gradient_accumulation_steps=1

Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma2/2B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ batch_size: 4
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Training env
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma2/2B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ batch_size: 8
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Training env
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma2/2B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ batch_size: 4
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Training env
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma2/9B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory
optimizer_in_bwd: False # True saves memory. Requires gradient_accumulation_steps=1

Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma2/9B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ batch_size: 4
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Training env
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma2/9B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ batch_size: 8
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Training env
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma2/9B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ batch_size: 4
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Training env
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/13B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory
optimizer_in_bwd: False # True saves memory. Requires gradient_accumulation_steps=1

Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/13B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ loss:
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Logging
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/13B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ loss:
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Logging
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/70B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ loss:
# Training
epochs: 1
max_steps_per_epoch: null
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory
gradient_accumulation_steps: 1 # Use to increase effective batch size

Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/70B_qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ fsdp:
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Logging
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/7B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory
optimizer_in_bwd: False # True saves memory. Requires gradient_accumulation_steps=1

Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/7B_full_low_memory.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Training environment
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/7B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ loss:
# Training
epochs: 1
max_steps_per_epoch: null
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory
gradient_accumulation_steps: 8 # Use to increase effective batch size

Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/7B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ loss:
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Logging
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/7B_qat_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory
optimizer_in_bwd: False # True saves memory. Requires gradient_accumulation_steps=1

Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/7B_qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ fsdp:
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Logging
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/7B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ loss:
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Logging
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama3/70B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ enable_activation_checkpointing: True # True reduces memory
enable_activation_offloading: False # True reduces memory
custom_sharded_layers: ['tok_embeddings', 'output'] # Layers to shard separately (useful for large vocab size models). Lower Memory, but lower speed.
fsdp_cpu_offload: True
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory
optimizer_in_bwd: False # True saves memory. Requires gradient_accumulation_steps=1

Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama3/70B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ loss:
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Logging
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama3/8B_dora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ loss:
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Logging
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama3/8B_dora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ loss:
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory

# Logging
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama3/8B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ loss:
_component_: torchtune.modules.loss.CEWithChunkedOutputLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1 # Use to increase effective batch size
clip_grad_norm: null
compile: False # torch.compile the model + loss, True increases speed + decreases memory
optimizer_in_bwd: False # True saves memory. Requires gradient_accumulation_steps=1

Expand Down
Loading

0 comments on commit 1b4f781

Please sign in to comment.