Skip to content

Commit

Permalink
fallback
Browse files Browse the repository at this point in the history
  • Loading branch information
kylesayrs committed Dec 6, 2024
1 parent f9ab6fc commit 0dc74dd
Show file tree
Hide file tree
Showing 7 changed files with 47 additions and 7 deletions.
1 change: 0 additions & 1 deletion examples/multimodal_vision/mllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def data_collator(batch):
targets="Linear",
scheme="W8A8",
ignore=["re:.*lm_head", "re:multi_modal_projector.*", "re:vision_model.*"],
update_size=NUM_CALIBRATION_SAMPLES,
),
]

Expand Down
1 change: 0 additions & 1 deletion examples/multimodal_vision/pixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def data_collator(batch):
targets="Linear",
scheme="W8A8",
ignore=["re:.*lm_head", "re:multi_modal_projector.*", "re:vision_model.*"],
update_size=NUM_CALIBRATION_SAMPLES,
),
]

Expand Down
1 change: 0 additions & 1 deletion examples/multimodal_vision/qwen_vl2.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ def data_collator(batch):
),
},
ignore=["re:.*lm_head"],
update_size=NUM_CALIBRATION_SAMPLES,
dampening_frac=0.5,
)

Expand Down
14 changes: 10 additions & 4 deletions src/llmcompressor/modifiers/quantization/gptq/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
)
from llmcompressor.modifiers.quantization.quantization.base import QuantizationModifier
from llmcompressor.modifiers.utils.hooks import HooksMixin
from llmcompressor.pipelines.piecewise import run_pipeline
from llmcompressor.pipelines.basic import run_pipeline as run_basic
from llmcompressor.pipelines.piecewise import run_pipeline as run_piecewise
from llmcompressor.utils.metric_logging import CompressionLogger
from llmcompressor.utils.pytorch.module import (
get_layers,
Expand Down Expand Up @@ -217,9 +218,14 @@ def on_initialize(self, state: "State", **kwargs) -> bool:
if self._update_size is None:
self._update_size = len(state.data.calib)

run_pipeline(
state.model, self.sequential_targets, state.data.calib, propagate_error=True
)
# run_pipeline(
# state.model, self.sequential_targets, state.data.calib, propagate_error=True
# )

self.offload_hessians = True
run_basic(state.model, state.data.calib)



return True

Expand Down
Empty file.
1 change: 1 addition & 0 deletions src/llmcompressor/pipelines/basic/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .pipeline import run_pipeline
36 changes: 36 additions & 0 deletions src/llmcompressor/pipelines/basic/pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from contextlib import nullcontext
from typing import List

import torch
import torch.utils.data.dataloader
import tqdm

from llmcompressor.core import callbacks as session_callbacks
from llmcompressor.modifiers.modifier import Modifier
from llmcompressor.modifiers.utils.hooks import HooksMixin
from llmcompressor.modifiers.utils.pytorch_helpers import apply_pad_mask_to_batch
from llmcompressor.pipelines.piecewise.helpers import (
infer_sequential_targets,
trace_subgraphs,
)
from llmcompressor.pytorch.utils.helpers import tensors_to_device
from llmcompressor.utils.helpers import calibration_forward_context

__all__ = ["run_pipeline"]

def run_pipeline(
model: torch.nn.Module,
dataloader: torch.utils.data.DataLoader,
):
# TODO: revisit
device_map = getattr(model, "hf_device_map", None)
if device_map is not None:
model_device = next(iter(device_map.values()))
else:
model_device = model.device

for batch in tqdm.tqdm(dataloader, desc="Calibrating"):
batch = apply_pad_mask_to_batch(batch)
batch = tensors_to_device(batch, model_device)
model(**batch)

0 comments on commit 0dc74dd

Please sign in to comment.