From 7351fdb5832471509a68d8230b843780816f1084 Mon Sep 17 00:00:00 2001 From: George Date: Mon, 30 Sep 2024 09:50:21 -0400 Subject: [PATCH] ignore list (#171) --- src/compressed_tensors/quantization/quant_config.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/compressed_tensors/quantization/quant_config.py b/src/compressed_tensors/quantization/quant_config.py index 30785554..04c8deb7 100644 --- a/src/compressed_tensors/quantization/quant_config.py +++ b/src/compressed_tensors/quantization/quant_config.py @@ -201,6 +201,13 @@ def from_pretrained( if len(quant_scheme_to_layers) == 0: # No quantized layers return None + # kv-cache only, no weight/activation quantization + if ( + len(quantization_type_names) == 1 + and "attention" in list(quantization_type_names)[0].lower() + ): + quantization_type_names.add("Linear") + # clean up ignore list, we can leave out layers types if none of the # instances are quantized consolidated_ignore = []