revert back contaminated code (#71)

vllm-project · Aug 8, 2024 · da5bd54 · da5bd54
1 parent a060f5e
commit da5bd54
Showing 1 changed file with 7 additions and 1 deletion.
diff --git a/src/llmcompressor/transformers/compression/helpers.py b/src/llmcompressor/transformers/compression/helpers.py
@@ -138,7 +138,13 @@ def quantization_memory_requirement(model: torch.nn.Module) -> int:
             for param in module.parameters():
                 # assume the max of group 128 and static scale/zp
                 # TODO: base this on the recipe instead instead of assuming max
-                max_quant_shape = param.shape[0] * param.shape[1] // 128
+
+                # potentially just bias term
+                max_quant_shape = param.shape[0] // 128
+
+                if len(param.size()) > 1:  # weights
+                    max_quant_shape *= param.shape[1]
+
                 total_elements += max_quant_shape * 4
 
     bytes_ratio = 32 // 16  # assuming float16