GPTQ add Arkiv link, move file location (#1100)

## Purpose ## * Better docstring for GPTQ * Reduce unnecessary file hierarchy Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
vllm-project · Jan 29, 2025 · ba8563c · ba8563c
1 parent a76563a
commit ba8563c
Show file tree

Hide file tree

Showing 3 changed files with 4 additions and 5 deletions.
diff --git a/src/llmcompressor/modifiers/quantization/gptq/base.py b/src/llmcompressor/modifiers/quantization/gptq/base.py
@@ -16,7 +16,7 @@
 from llmcompressor.core import State
 from llmcompressor.modifiers import Modifier, ModifierFactory
 from llmcompressor.modifiers.quantization.calibration import freeze_module_quantization
-from llmcompressor.modifiers.quantization.gptq.utils.gptq_quantize import (
+from llmcompressor.modifiers.quantization.gptq.gptq_quantize import (
     accumulate_hessian,
     make_empty_hessian,
     quantize_weight,
@@ -36,7 +36,9 @@
 
 class GPTQModifier(Modifier, HooksMixin):
     """
-    Modifier for applying the one-shot OBCQ algorithm to a model
+    Implements the GPTQ algorithm from https://arxiv.org/abs/2210.17323. This modifier
+    uses activations to calibrate a hessian matrix, which is then used to determine
+    optimal quantizion values and orderings for the model weights.
 
     | Sample yaml:
     | test_stage:

diff --git a/.../quantization/gptq/utils/gptq_quantize.py → ...ifiers/quantization/gptq/gptq_quantize.py b/.../quantization/gptq/utils/gptq_quantize.py → ...ifiers/quantization/gptq/gptq_quantize.py
diff --git a/src/llmcompressor/modifiers/quantization/gptq/utils/__init__.py b/src/llmcompressor/modifiers/quantization/gptq/utils/__init__.py