From 7351fdb5832471509a68d8230b843780816f1084 Mon Sep 17 00:00:00 2001
From: George <george@neuralmagic.com>
Date: Mon, 30 Sep 2024 09:50:21 -0400
Subject: [PATCH] ignore list (#171)

---
 src/compressed_tensors/quantization/quant_config.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/compressed_tensors/quantization/quant_config.py b/src/compressed_tensors/quantization/quant_config.py
index 30785554..04c8deb7 100644
--- a/src/compressed_tensors/quantization/quant_config.py
+++ b/src/compressed_tensors/quantization/quant_config.py
@@ -201,6 +201,13 @@ def from_pretrained(
         if len(quant_scheme_to_layers) == 0:  # No quantized layers
             return None
 
+        # kv-cache only, no weight/activation quantization
+        if (
+            len(quantization_type_names) == 1
+            and "attention" in list(quantization_type_names)[0].lower()
+        ):
+            quantization_type_names.add("Linear")
+
         # clean up ignore list, we can leave out layers types if none of the
         # instances are quantized
         consolidated_ignore = []