Skip to content

Commit

Permalink
sparseautomodel compatability
Browse files Browse the repository at this point in the history
  • Loading branch information
Sara Adkins committed Aug 7, 2024
1 parent 278fc20 commit f1e269f
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
3 changes: 2 additions & 1 deletion src/llmcompressor/transformers/compression/helpers.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from typing import Dict, List, Optional, Union
from accelerate.accelerator import get_state_dict_offloaded_model

import psutil
import torch
from accelerate import infer_auto_device_map, init_empty_weights
from accelerate.accelerator import get_state_dict_offloaded_model
from torch.nn.modules import Linear
from tqdm import tqdm
from transformers import AutoModelForCausalLM
Expand Down
11 changes: 7 additions & 4 deletions src/llmcompressor/transformers/sparsification/sparse_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,17 @@ def skip(*args, **kwargs):
if compressor is not None:
quantization_config = compressor.quantization_config
is_compressed = (
quantization_config is not None and
quantization_config.quantization_status == QuantizationStatus.COMPRESSED
quantization_config is not None
and quantization_config.quantization_status
== QuantizationStatus.COMPRESSED
)
if run_compressed and is_compressed:
# initialize quantization, don't decompress
apply_quantization_config(model, quantization_config)
apply_quantization_config(
model, quantization_config, run_compressed=True
)
model = load_checkpoint_and_dispatch(
model, pretrained_model_name_or_path, *model_args, **kwargs
model, pretrained_model_name_or_path
)
else:
# initialize quantization and decompress weights
Expand Down

0 comments on commit f1e269f

Please sign in to comment.