ignore list (#171)

neuralmagic · Sep 30, 2024 · 7351fdb · 7351fdb
1 parent f3d9ec2
commit 7351fdb
Showing 1 changed file with 7 additions and 0 deletions.
diff --git a/src/compressed_tensors/quantization/quant_config.py b/src/compressed_tensors/quantization/quant_config.py
@@ -201,6 +201,13 @@ def from_pretrained(
         if len(quant_scheme_to_layers) == 0:  # No quantized layers
             return None
 
+        # kv-cache only, no weight/activation quantization
+        if (
+            len(quantization_type_names) == 1
+            and "attention" in list(quantization_type_names)[0].lower()
+        ):
+            quantization_type_names.add("Linear")
+
         # clean up ignore list, we can leave out layers types if none of the
         # instances are quantized
         consolidated_ignore = []