From a460b70b1f0cf5365c275d094b9615d1a158cc87 Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Thu, 26 Sep 2024 13:45:29 +0000 Subject: [PATCH] Update folder structure Move tests --- .../compressors/__init__.py | 18 +-- src/compressed_tensors/compressors/base.py | 6 +- src/compressed_tensors/compressors/helpers.py | 2 +- .../compressors/model_compressors/__init__.py | 17 ++ .../model_compressor.py | 10 +- .../quantized_compressors/__init__.py | 18 +++ .../compressors/quantized_compressors/base.py | 146 ++++++++++++++++++ .../naive_quantized.py | 10 +- .../pack_quantized.py | 2 +- .../sparse_compressors/__init__.py | 18 +++ .../compressors/sparse_compressors/base.py | 110 +++++++++++++ .../{ => sparse_compressors}/dense.py | 0 .../sparse_bitmask.py | 2 +- .../sparse_quantized_compressors/__init__.py | 16 ++ .../marlin_24.py | 0 .../model_compressors/__init__.py | 13 ++ .../test_model_compressor.py | 1 + .../quantized_compressors/__init__.py | 13 ++ .../test_fp8_quant.py | 0 .../test_int_quant.py | 0 .../test_pack_quant.py | 2 +- .../sparse_compressors/__init__.py | 13 ++ .../{ => sparse_compressors}/test_bitmask.py | 0 .../sparse_quantized_compressors/__init__.py | 13 ++ .../test_marlin_24.py | 0 25 files changed, 401 insertions(+), 29 deletions(-) create mode 100644 src/compressed_tensors/compressors/model_compressors/__init__.py rename src/compressed_tensors/compressors/{ => model_compressors}/model_compressor.py (98%) create mode 100644 src/compressed_tensors/compressors/quantized_compressors/__init__.py create mode 100644 src/compressed_tensors/compressors/quantized_compressors/base.py rename src/compressed_tensors/compressors/{ => quantized_compressors}/naive_quantized.py (93%) rename src/compressed_tensors/compressors/{ => quantized_compressors}/pack_quantized.py (99%) create mode 100644 src/compressed_tensors/compressors/sparse_compressors/__init__.py create mode 100644 src/compressed_tensors/compressors/sparse_compressors/base.py rename src/compressed_tensors/compressors/{ => sparse_compressors}/dense.py (100%) rename src/compressed_tensors/compressors/{ => sparse_compressors}/sparse_bitmask.py (98%) create mode 100644 src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py rename src/compressed_tensors/compressors/{ => sparse_quantized_compressors}/marlin_24.py (100%) create mode 100644 tests/test_compressors/model_compressors/__init__.py rename tests/test_compressors/{ => model_compressors}/test_model_compressor.py (97%) create mode 100644 tests/test_compressors/quantized_compressors/__init__.py rename tests/test_compressors/{ => quantized_compressors}/test_fp8_quant.py (100%) rename tests/test_compressors/{ => quantized_compressors}/test_int_quant.py (100%) rename tests/test_compressors/{ => quantized_compressors}/test_pack_quant.py (99%) create mode 100644 tests/test_compressors/sparse_compressors/__init__.py rename tests/test_compressors/{ => sparse_compressors}/test_bitmask.py (100%) create mode 100644 tests/test_compressors/sparse_quantized_compressors/__init__.py rename tests/test_compressors/{ => sparse_quantized_compressors}/test_marlin_24.py (100%) diff --git a/src/compressed_tensors/compressors/__init__.py b/src/compressed_tensors/compressors/__init__.py index 21b20589..138e3899 100644 --- a/src/compressed_tensors/compressors/__init__.py +++ b/src/compressed_tensors/compressors/__init__.py @@ -14,15 +14,9 @@ # flake8: noqa -from .base import BaseCompressor -from .dense import DenseCompressor -from .helpers import load_compressed, save_compressed, save_compressed_model -from .marlin_24 import Marlin24Compressor -from .model_compressor import ModelCompressor, map_modules_to_quant_args -from .naive_quantized import ( - FloatQuantizationCompressor, - IntQuantizationCompressor, - QuantizationCompressor, -) -from .pack_quantized import PackedQuantizationCompressor -from .sparse_bitmask import BitmaskCompressor, BitmaskTensor +from .base import * +from .helpers import * +from .model_compressors import * +from .quantized_compressors import * +from .sparse_compressors import * +from .sparse_quantized_compressors import * diff --git a/src/compressed_tensors/compressors/base.py b/src/compressed_tensors/compressors/base.py index f63cab37..ee751053 100644 --- a/src/compressed_tensors/compressors/base.py +++ b/src/compressed_tensors/compressors/base.py @@ -37,18 +37,18 @@ class BaseCompressor(RegistryMixin, ABC): Model Load Lifecycle (run_compressed=False): - ModelCompressor.decompress() - apply_quantization_config() - - Compressor.decompress() + - BaseCompressor.decompress() Model Save Lifecycle: - ModelCompressor.compress() - - Compressor.compress() + - BaseCompressor.compress() Module Lifecycle (run_compressed=True): - apply_quantization_config() - compressed_module = CompressedLinear(module) - initialize_module_for_quantization() - - Compressor.compression_param_info() + - BaseCompressor.compression_param_info() - register_parameters() - compressed_module.forward() -compressed_module.decompress() diff --git a/src/compressed_tensors/compressors/helpers.py b/src/compressed_tensors/compressors/helpers.py index 2753621b..7b03a9a1 100644 --- a/src/compressed_tensors/compressors/helpers.py +++ b/src/compressed_tensors/compressors/helpers.py @@ -16,7 +16,7 @@ from typing import Dict, Generator, Optional, Tuple, Union import torch -from compressed_tensors.compressors.base import BaseCompressor +from compressed_tensors.compressors import BaseCompressor from compressed_tensors.config import CompressionFormat, SparsityCompressionConfig from compressed_tensors.utils.safetensors_load import get_weight_mappings from safetensors import safe_open diff --git a/src/compressed_tensors/compressors/model_compressors/__init__.py b/src/compressed_tensors/compressors/model_compressors/__init__.py new file mode 100644 index 00000000..d9cfa852 --- /dev/null +++ b/src/compressed_tensors/compressors/model_compressors/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# flake8: noqa + + +from .model_compressor import * diff --git a/src/compressed_tensors/compressors/model_compressor.py b/src/compressed_tensors/compressors/model_compressors/model_compressor.py similarity index 98% rename from src/compressed_tensors/compressors/model_compressor.py rename to src/compressed_tensors/compressors/model_compressors/model_compressor.py index ac15fdaa..e5725189 100644 --- a/src/compressed_tensors/compressors/model_compressor.py +++ b/src/compressed_tensors/compressors/model_compressors/model_compressor.py @@ -30,7 +30,7 @@ QUANTIZATION_METHOD_NAME, SPARSITY_CONFIG_NAME, ) -from compressed_tensors.compressors.base import BaseCompressor +from compressed_tensors.compressors import Compressor from compressed_tensors.config import CompressionFormat, SparsityCompressionConfig from compressed_tensors.quantization import ( DEFAULT_QUANTIZATION_METHOD, @@ -247,11 +247,11 @@ def __init__( self.sparsity_config = None if sparsity_config is not None: - self.sparsity_compressor = BaseCompressor.load_from_registry( + self.sparsity_compressor = Compressor.load_from_registry( sparsity_config.format, config=sparsity_config ) if quantization_config is not None: - self.quantization_compressor = BaseCompressor.load_from_registry( + self.quantization_compressor = Compressor.load_from_registry( quantization_config.format, config=quantization_config ) @@ -262,7 +262,7 @@ def compress( Compresses a dense state dict or model with sparsity and/or quantization :param model: uncompressed model to compress - :param state_dict: optional uncompressed state_dict to insert into model + :param model_state: optional uncompressed state_dict to insert into model :return: compressed state dict """ if state_dict is None: @@ -393,4 +393,4 @@ def new_dtype_byte_size(dtype): if bit_search is None: raise ValueError(f"`dtype` is not a valid dtype: {dtype}.") bit_size = int(bit_search.groups()[0]) - return bit_size // 8 + return bit_size // 8 \ No newline at end of file diff --git a/src/compressed_tensors/compressors/quantized_compressors/__init__.py b/src/compressed_tensors/compressors/quantized_compressors/__init__.py new file mode 100644 index 00000000..51e8b8e2 --- /dev/null +++ b/src/compressed_tensors/compressors/quantized_compressors/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# flake8: noqa + +from .base import * +from .naive_quantized import * +from .pack_quantized import * diff --git a/src/compressed_tensors/compressors/quantized_compressors/base.py b/src/compressed_tensors/compressors/quantized_compressors/base.py new file mode 100644 index 00000000..67065aa4 --- /dev/null +++ b/src/compressed_tensors/compressors/quantized_compressors/base.py @@ -0,0 +1,146 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import Dict, Generator, Tuple + +import torch +from compressed_tensors.compressors.base import BaseCompressor +from compressed_tensors.quantization import QuantizationArgs +from compressed_tensors.utils import get_nested_weight_mappings, merge_names +from safetensors import safe_open +from torch import Tensor +from tqdm import tqdm + + +_LOGGER: logging.Logger = logging.getLogger(__name__) + +__all__ = ["BaseQuantizationCompressor"] + + +class BaseQuantizationCompressor(BaseCompressor): + """ + Base class representing a quant compression algorithm. Each child class should + implement compression_param_info, compress_weight and decompress_weight. + + Compressors support compressing/decompressing a full module state dict or a single + quantized PyTorch leaf module. + + Model Load Lifecycle (run_compressed=False): + - ModelCompressor.decompress() + - apply_quantization_config() + - BaseQuantiazationCompressor.decompress() + - BaseQuantizationCompressor.decompress_weight() + + Model Save Lifecycle: + - ModelCompressor.compress() + - BaseQuantizationCompressor.compress() + - BaseQuantizationCompressor.compress_weight() + + Module Lifecycle (run_compressed=True): + - apply_quantization_config() + - compressed_module = CompressedLinear(module) + - initialize_module_for_quantization() + - BaseQuantizationCompressor.compression_param_info() + - register_parameters() + - compressed_module.forward() + -compressed_module.decompress() + + + :param config: config specifying compression parameters + """ + + def compress( + self, + model_state: Dict[str, Tensor], + names_to_scheme: Dict[str, QuantizationArgs], + **kwargs, + ) -> Dict[str, Tensor]: + """ + Compresses a dense state dict + + :param model_state: state dict of uncompressed model + :param names_to_scheme: quantization args for each quantized weight, needed for + quantize function to calculate bit depth + :return: compressed state dict + """ + compressed_dict = {} + weight_suffix = ".weight" + _LOGGER.debug( + f"Compressing model with {len(model_state)} parameterized layers..." + ) + + for name, value in tqdm(model_state.items(), desc="Quantized Compression"): + if name.endswith(weight_suffix): + prefix = name[: -(len(weight_suffix))] + scale = model_state.get(merge_names(prefix, "weight_scale"), None) + zp = model_state.get(merge_names(prefix, "weight_zero_point"), None) + g_idx = model_state.get(merge_names(prefix, "weight_g_idx"), None) + if scale is not None: + # weight is quantized, compress it + quant_args = names_to_scheme[prefix] + compressed_data = self.compress_weight( + weight=value, + scale=scale, + zero_point=zp, + g_idx=g_idx, + quantization_args=quant_args, + device="cpu", + ) + for key, value in compressed_data.items(): + compressed_dict[merge_names(prefix, key)] = value + else: + compressed_dict[name] = value.to("cpu") + elif name.endswith("zero_point") and torch.all(value == 0): + continue + elif name.endswith("g_idx") and torch.any(value <= -1): + continue + else: + compressed_dict[name] = value.to("cpu") + + return compressed_dict + + def decompress( + self, + path_to_model_or_tensors: str, + names_to_scheme: Dict[str, QuantizationArgs], + device: str = "cpu", + ) -> Generator[Tuple[str, Tensor], None, None]: + """ + Reads a compressed state dict located at path_to_model_or_tensors + and returns a generator for sequentially decompressing back to a + dense state dict + + :param path_to_model_or_tensors: path to compressed safetensors model (directory + with one or more safetensors files) or compressed tensors file + :param names_to_scheme: quantization args for each quantized weight + :param device: optional device to load intermediate weights into + :return: compressed state dict + """ + weight_mappings = get_nested_weight_mappings( + path_to_model_or_tensors, self.COMPRESSION_PARAM_NAMES + ) + for weight_name in weight_mappings.keys(): + weight_data = {} + for param_name, safe_path in weight_mappings[weight_name].items(): + full_name = merge_names(weight_name, param_name) + with safe_open(safe_path, framework="pt", device=device) as f: + weight_data[param_name] = f.get_tensor(full_name) + + if "weight_scale" in weight_data: + quant_args = names_to_scheme[weight_name] + decompressed = self.decompress_weight( + compressed_data=weight_data, quantization_args=quant_args + ) + yield merge_names(weight_name, "weight"), decompressed diff --git a/src/compressed_tensors/compressors/naive_quantized.py b/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py similarity index 93% rename from src/compressed_tensors/compressors/naive_quantized.py rename to src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py index acc09932..0267aca4 100644 --- a/src/compressed_tensors/compressors/naive_quantized.py +++ b/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py @@ -16,7 +16,7 @@ import torch from compressed_tensors.compressors.base import BaseCompressor -from compressed_tensors.compressors.base_quantization_compressor import ( +from compressed_tensors.compressors.quantized_compressors.base import ( BaseQuantizationCompressor, ) from compressed_tensors.config import CompressionFormat @@ -27,14 +27,14 @@ __all__ = [ - "QuantizationCompressor", + "NaiveQuantizationCompressor", "IntQuantizationCompressor", "FloatQuantizationCompressor", ] @BaseCompressor.register(name=CompressionFormat.naive_quantized.value) -class QuantizationCompressor(BaseQuantizationCompressor): +class NaiveQuantizationCompressor(BaseQuantizationCompressor): """ Implements naive compression for quantized models. Weight of each quantized layer is converted from its original float type to the closest Pytorch @@ -123,7 +123,7 @@ def decompress_weight( @BaseCompressor.register(name=CompressionFormat.int_quantized.value) -class IntQuantizationCompressor(QuantizationCompressor): +class IntQuantizationCompressor(NaiveQuantizationCompressor): """ Alias for integer quantized models """ @@ -132,7 +132,7 @@ class IntQuantizationCompressor(QuantizationCompressor): @BaseCompressor.register(name=CompressionFormat.float_quantized.value) -class FloatQuantizationCompressor(QuantizationCompressor): +class FloatQuantizationCompressor(NaiveQuantizationCompressor): """ Alias for fp quantized models """ diff --git a/src/compressed_tensors/compressors/pack_quantized.py b/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py similarity index 99% rename from src/compressed_tensors/compressors/pack_quantized.py rename to src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py index 9d63e264..ce9f0a57 100644 --- a/src/compressed_tensors/compressors/pack_quantized.py +++ b/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py @@ -17,7 +17,7 @@ import numpy as np import torch from compressed_tensors.compressors.base import BaseCompressor -from compressed_tensors.compressors.base_quantization_compressor import ( +from compressed_tensors.compressors.quantized_compressors.base import ( BaseQuantizationCompressor, ) from compressed_tensors.config import CompressionFormat diff --git a/src/compressed_tensors/compressors/sparse_compressors/__init__.py b/src/compressed_tensors/compressors/sparse_compressors/__init__.py new file mode 100644 index 00000000..de4fd887 --- /dev/null +++ b/src/compressed_tensors/compressors/sparse_compressors/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# flake8: noqa + +from .base import * +from .dense import * +from .sparse_bitmask import * diff --git a/src/compressed_tensors/compressors/sparse_compressors/base.py b/src/compressed_tensors/compressors/sparse_compressors/base.py new file mode 100644 index 00000000..308ddab2 --- /dev/null +++ b/src/compressed_tensors/compressors/sparse_compressors/base.py @@ -0,0 +1,110 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import Dict, Generator, Tuple + +from compressed_tensors.compressors.base import BaseCompressor +from compressed_tensors.utils import get_nested_weight_mappings, merge_names +from safetensors import safe_open +from torch import Tensor +from tqdm import tqdm + + +__all__ = ["BaseSparseCompressor"] + +_LOGGER: logging.Logger = logging.getLogger(__name__) + + +class BaseSparseCompressor(BaseCompressor): + """ + Base class representing a sparse compression algorithm. Each child class should + implement compression_param_info, compress_weight and decompress_weight. + + Compressors support compressing/decompressing a full module state dict or a single + quantized PyTorch leaf module. + + Model Load Lifecycle (run_compressed=False): + - ModelCompressor.decompress() + - apply_quantization_config() + - BaseSparseCompressor.decompress() + - BaseSparseCompressor.decompress_weight() + + Model Save Lifecycle: + - ModelCompressor.compress() + - BaseSparseCompressor.compress() + - BaseSparseCompressor.compress_weight() + + Module Lifecycle (run_compressed=True): + - apply_quantization_config() + - compressed_module = CompressedLinear(module) + - initialize_module_for_quantization() + - BaseSparseCompressor.compression_param_info() + - register_parameters() + - compressed_module.forward() + -compressed_module.decompress() + + + :param config: config specifying compression parameters + """ + + def compress(self, model_state: Dict[str, Tensor]) -> Dict[str, Tensor]: + """ + Compresses a dense state dict using bitmask compression + + :param model_state: state dict of uncompressed model + :return: compressed state dict + """ + compressed_dict = {} + _LOGGER.debug( + f"Compressing model with {len(model_state)} parameterized layers..." + ) + for name, value in tqdm(model_state.items(), desc="Compressing model"): + compression_data = self.compress_weight(name, value) + for key in compression_data.keys(): + if key in compressed_dict: + _LOGGER.warn( + f"Expected all compressed state_dict keys to be unique, but " + f"found an existing entry for {key}. The existing entry will " + "be replaced." + ) + + compressed_dict.update(compression_data) + + return compressed_dict + + def decompress( + self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs + ) -> Generator[Tuple[str, Tensor], None, None]: + """ + Reads a bitmask compressed state dict located + at path_to_model_or_tensors and returns a generator + for sequentially decompressing back to a dense state dict + + :param model_path: path to compressed safetensors model (directory with + one or more safetensors files) or compressed tensors file + :param device: device to load decompressed weights onto + :return: iterator for generating decompressed weights + """ + weight_mappings = get_nested_weight_mappings( + path_to_model_or_tensors, self.COMPRESSION_PARAM_NAMES + ) + for weight_name in weight_mappings.keys(): + weight_data = {} + for param_name, safe_path in weight_mappings[weight_name].items(): + full_name = merge_names(weight_name, param_name) + with safe_open(safe_path, framework="pt", device=device) as f: + weight_data[param_name] = f.get_tensor(full_name) + decompressed = self.decompress_weight(weight_data) + yield weight_name, decompressed diff --git a/src/compressed_tensors/compressors/dense.py b/src/compressed_tensors/compressors/sparse_compressors/dense.py similarity index 100% rename from src/compressed_tensors/compressors/dense.py rename to src/compressed_tensors/compressors/sparse_compressors/dense.py diff --git a/src/compressed_tensors/compressors/sparse_bitmask.py b/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py similarity index 98% rename from src/compressed_tensors/compressors/sparse_bitmask.py rename to src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py index 63124163..a950aa64 100644 --- a/src/compressed_tensors/compressors/sparse_bitmask.py +++ b/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py @@ -17,7 +17,7 @@ import numpy import torch from compressed_tensors.compressors.base import BaseCompressor -from compressed_tensors.compressors.base_sparsity_compressor import BaseSparseCompressor +from compressed_tensors.compressors.sparse_compressors.base import BaseSparseCompressor from compressed_tensors.config import CompressionFormat from compressed_tensors.utils import merge_names from torch import Tensor diff --git a/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py b/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py new file mode 100644 index 00000000..c3615f06 --- /dev/null +++ b/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# flake8: noqa + +from .marlin_24 import Marlin24Compressor diff --git a/src/compressed_tensors/compressors/marlin_24.py b/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py similarity index 100% rename from src/compressed_tensors/compressors/marlin_24.py rename to src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py diff --git a/tests/test_compressors/model_compressors/__init__.py b/tests/test_compressors/model_compressors/__init__.py new file mode 100644 index 00000000..0c44f887 --- /dev/null +++ b/tests/test_compressors/model_compressors/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/test_compressors/test_model_compressor.py b/tests/test_compressors/model_compressors/test_model_compressor.py similarity index 97% rename from tests/test_compressors/test_model_compressor.py rename to tests/test_compressors/model_compressors/test_model_compressor.py index 3f6940a9..00c1641f 100644 --- a/tests/test_compressors/test_model_compressor.py +++ b/tests/test_compressors/model_compressors/test_model_compressor.py @@ -19,6 +19,7 @@ from compressed_tensors.config.base import SparsityCompressionConfig from compressed_tensors.quantization.quant_config import QuantizationConfig from tests.testing_utils import requires_hf_quantizer +from compressed_tensors.compressors.model_compressors import ModelCompressor def sparsity_config(): diff --git a/tests/test_compressors/quantized_compressors/__init__.py b/tests/test_compressors/quantized_compressors/__init__.py new file mode 100644 index 00000000..0c44f887 --- /dev/null +++ b/tests/test_compressors/quantized_compressors/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/test_compressors/test_fp8_quant.py b/tests/test_compressors/quantized_compressors/test_fp8_quant.py similarity index 100% rename from tests/test_compressors/test_fp8_quant.py rename to tests/test_compressors/quantized_compressors/test_fp8_quant.py diff --git a/tests/test_compressors/test_int_quant.py b/tests/test_compressors/quantized_compressors/test_int_quant.py similarity index 100% rename from tests/test_compressors/test_int_quant.py rename to tests/test_compressors/quantized_compressors/test_int_quant.py diff --git a/tests/test_compressors/test_pack_quant.py b/tests/test_compressors/quantized_compressors/test_pack_quant.py similarity index 99% rename from tests/test_compressors/test_pack_quant.py rename to tests/test_compressors/quantized_compressors/test_pack_quant.py index bef8adc3..496e8304 100644 --- a/tests/test_compressors/test_pack_quant.py +++ b/tests/test_compressors/quantized_compressors/test_pack_quant.py @@ -20,7 +20,7 @@ import pytest import torch from compressed_tensors import PackedQuantizationCompressor -from compressed_tensors.compressors.pack_quantized import ( +from compressed_tensors.compressors.quantized_compressors.pack_quantized import ( pack_to_int32, unpack_from_int32, ) diff --git a/tests/test_compressors/sparse_compressors/__init__.py b/tests/test_compressors/sparse_compressors/__init__.py new file mode 100644 index 00000000..0c44f887 --- /dev/null +++ b/tests/test_compressors/sparse_compressors/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/test_compressors/test_bitmask.py b/tests/test_compressors/sparse_compressors/test_bitmask.py similarity index 100% rename from tests/test_compressors/test_bitmask.py rename to tests/test_compressors/sparse_compressors/test_bitmask.py diff --git a/tests/test_compressors/sparse_quantized_compressors/__init__.py b/tests/test_compressors/sparse_quantized_compressors/__init__.py new file mode 100644 index 00000000..0c44f887 --- /dev/null +++ b/tests/test_compressors/sparse_quantized_compressors/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/test_compressors/test_marlin_24.py b/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py similarity index 100% rename from tests/test_compressors/test_marlin_24.py rename to tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py