Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the condition for asymmetric quantization #225

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,15 @@ def map_modules_to_quant_args(model: Module) -> Dict[str, QuantizationArgs]:
if is_module_quantized(submodule):
if submodule.quantization_scheme.weights is not None:
name = fix_fsdp_module_name(name)
quantized_modules_to_args[name] = submodule.quantization_scheme.weights
quantized_modules_to_args[name] = (
submodule.quantization_scheme.weights,
)
if submodule.quantization_scheme.input_activations is not None:
weight_args = quantized_modules_to_args.get(name)[0]
quantized_modules_to_args[name] = (
weight_args,
submodule.quantization_scheme.input_activations,
)

return quantized_modules_to_args

Expand Down
17 changes: 15 additions & 2 deletions src/compressed_tensors/compressors/quantized_compressors/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,19 +77,24 @@ def compress(
"""
compressed_dict = {}
weight_suffix = ".weight"
input_zp_suffix = ".input_zero_point"
weight_zp_suffix = ".weight_zero_point"
_LOGGER.debug(
f"Compressing model with {len(model_state)} parameterized layers..."
)

for name, value in tqdm(model_state.items(), desc="Quantized Compression"):
weight_zp = name.endswith(weight_zp_suffix)
input_zp = name.endswith(input_zp_suffix)

if name.endswith(weight_suffix):
prefix = name[: -(len(weight_suffix))]
scale = model_state.get(merge_names(prefix, "weight_scale"), None)
zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
g_idx = model_state.get(merge_names(prefix, "weight_g_idx"), None)
if scale is not None:
# weight is quantized, compress it
quant_args = names_to_scheme[prefix]
quant_args = names_to_scheme[prefix][0]
compressed_data = self.compress_weight(
weight=value,
scale=scale,
Expand All @@ -102,7 +107,15 @@ def compress(
compressed_dict[merge_names(prefix, key)] = value
else:
compressed_dict[name] = value.to("cpu")
elif name.endswith("zero_point") and torch.all(value == 0):
elif (
weight_zp
and names_to_scheme.get(name[: -(len(weight_zp_suffix))])[0].symmetric
):
continue
elif (
input_zp
and names_to_scheme.get(name[: -(len(input_zp_suffix))])[1].symmetric
):
continue
elif name.endswith("g_idx") and torch.any(value <= -1):
continue
Expand Down
Loading