Skip to content

Commit

Permalink
Fix 2/4 GPTQ Model Tests (vllm-project#769)
Browse files Browse the repository at this point in the history
* fix 2 / 4 failing bugs

* commented code
  • Loading branch information
dsikka authored Oct 2, 2024
1 parent f12b3c7 commit 8b14532
Showing 1 changed file with 13 additions and 5 deletions.
18 changes: 13 additions & 5 deletions tests/llmcompressor/transformers/gptq/test_oneshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,23 @@ def test_oneshot_application(self):
model_loaded = SparseAutoModelForCausalLM.from_pretrained(self.output)

# Check that the model is quantized
assert model_loaded.quantization_config is not None
# for compression_config - decompress() will attach a quantization_config
# to the model as we decompress right away
# for quantization_config - we have CompressedLinear which will only
# decompress on the forward pass and does not call decompress(). Results
# in a slightly different parameter tree to access the quant config
quantization_config = (
model_loaded.config.quantization_config.quantization_config
)
assert quantization_config is not None

# check config is set properly
assert model_loaded.quantization_config.ignore == ["lm_head"]
assert len(model_loaded.quantization_config.config_groups) == 1
quant_scheme = model_loaded.quantization_config.config_groups["group_0"]
assert quantization_config.ignore == ["lm_head"]
assert len(quantization_config.config_groups) == 1
quant_scheme = quantization_config.config_groups["group_0"]
assert isinstance(quant_scheme, QuantizationScheme)
assert quant_scheme.targets == ["Linear"]
weight_args = model_loaded.quantization_config.config_groups["group_0"].weights
weight_args = quantization_config.config_groups["group_0"].weights
assert isinstance(weight_args, QuantizationArgs)
assert weight_args.num_bits == 4

Expand Down

0 comments on commit 8b14532

Please sign in to comment.