diff --git a/src/compressed_tensors/compressors/__init__.py b/src/compressed_tensors/compressors/__init__.py index 21b20589..138e3899 100644 --- a/src/compressed_tensors/compressors/__init__.py +++ b/src/compressed_tensors/compressors/__init__.py @@ -14,15 +14,9 @@ # flake8: noqa -from .base import BaseCompressor -from .dense import DenseCompressor -from .helpers import load_compressed, save_compressed, save_compressed_model -from .marlin_24 import Marlin24Compressor -from .model_compressor import ModelCompressor, map_modules_to_quant_args -from .naive_quantized import ( - FloatQuantizationCompressor, - IntQuantizationCompressor, - QuantizationCompressor, -) -from .pack_quantized import PackedQuantizationCompressor -from .sparse_bitmask import BitmaskCompressor, BitmaskTensor +from .base import * +from .helpers import * +from .model_compressors import * +from .quantized_compressors import * +from .sparse_compressors import * +from .sparse_quantized_compressors import * diff --git a/src/compressed_tensors/compressors/base.py b/src/compressed_tensors/compressors/base.py index f63cab37..ee751053 100644 --- a/src/compressed_tensors/compressors/base.py +++ b/src/compressed_tensors/compressors/base.py @@ -37,18 +37,18 @@ class BaseCompressor(RegistryMixin, ABC): Model Load Lifecycle (run_compressed=False): - ModelCompressor.decompress() - apply_quantization_config() - - Compressor.decompress() + - BaseCompressor.decompress() Model Save Lifecycle: - ModelCompressor.compress() - - Compressor.compress() + - BaseCompressor.compress() Module Lifecycle (run_compressed=True): - apply_quantization_config() - compressed_module = CompressedLinear(module) - initialize_module_for_quantization() - - Compressor.compression_param_info() + - BaseCompressor.compression_param_info() - register_parameters() - compressed_module.forward() -compressed_module.decompress() diff --git a/src/compressed_tensors/compressors/helpers.py b/src/compressed_tensors/compressors/helpers.py index 2753621b..7b03a9a1 100644 --- a/src/compressed_tensors/compressors/helpers.py +++ b/src/compressed_tensors/compressors/helpers.py @@ -16,7 +16,7 @@ from typing import Dict, Generator, Optional, Tuple, Union import torch -from compressed_tensors.compressors.base import BaseCompressor +from compressed_tensors.compressors import BaseCompressor from compressed_tensors.config import CompressionFormat, SparsityCompressionConfig from compressed_tensors.utils.safetensors_load import get_weight_mappings from safetensors import safe_open diff --git a/src/compressed_tensors/compressors/model_compressors/__init__.py b/src/compressed_tensors/compressors/model_compressors/__init__.py new file mode 100644 index 00000000..d9cfa852 --- /dev/null +++ b/src/compressed_tensors/compressors/model_compressors/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# flake8: noqa + + +from .model_compressor import * diff --git a/src/compressed_tensors/compressors/model_compressor.py b/src/compressed_tensors/compressors/model_compressors/model_compressor.py similarity index 100% rename from src/compressed_tensors/compressors/model_compressor.py rename to src/compressed_tensors/compressors/model_compressors/model_compressor.py diff --git a/src/compressed_tensors/compressors/quantized_compressors/__init__.py b/src/compressed_tensors/compressors/quantized_compressors/__init__.py new file mode 100644 index 00000000..51e8b8e2 --- /dev/null +++ b/src/compressed_tensors/compressors/quantized_compressors/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# flake8: noqa + +from .base import * +from .naive_quantized import * +from .pack_quantized import * diff --git a/src/compressed_tensors/compressors/base_quantization_compressor.py b/src/compressed_tensors/compressors/quantized_compressors/base.py similarity index 94% rename from src/compressed_tensors/compressors/base_quantization_compressor.py rename to src/compressed_tensors/compressors/quantized_compressors/base.py index 563ecf56..67065aa4 100644 --- a/src/compressed_tensors/compressors/base_quantization_compressor.py +++ b/src/compressed_tensors/compressors/quantized_compressors/base.py @@ -24,10 +24,10 @@ from tqdm import tqdm -__all__ = ["BaseQuantizationCompressor"] - _LOGGER: logging.Logger = logging.getLogger(__name__) +__all__ = ["BaseQuantizationCompressor"] + class BaseQuantizationCompressor(BaseCompressor): """ @@ -40,22 +40,22 @@ class BaseQuantizationCompressor(BaseCompressor): Model Load Lifecycle (run_compressed=False): - ModelCompressor.decompress() - apply_quantization_config() - - Compressor.decompress() - - Compressor.decompress_weight() + - BaseQuantiazationCompressor.decompress() + - BaseQuantizationCompressor.decompress_weight() Model Save Lifecycle: - ModelCompressor.compress() - - Compressor.compress() - - Compressor.compress_weight() + - BaseQuantizationCompressor.compress() + - BaseQuantizationCompressor.compress_weight() Module Lifecycle (run_compressed=True): - apply_quantization_config() - compressed_module = CompressedLinear(module) - initialize_module_for_quantization() - - Compressor.compression_param_info() + - BaseQuantizationCompressor.compression_param_info() - register_parameters() - compressed_module.forward() - - compressed_module.decompress() + -compressed_module.decompress() :param config: config specifying compression parameters diff --git a/src/compressed_tensors/compressors/naive_quantized.py b/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py similarity index 93% rename from src/compressed_tensors/compressors/naive_quantized.py rename to src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py index acc09932..0267aca4 100644 --- a/src/compressed_tensors/compressors/naive_quantized.py +++ b/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py @@ -16,7 +16,7 @@ import torch from compressed_tensors.compressors.base import BaseCompressor -from compressed_tensors.compressors.base_quantization_compressor import ( +from compressed_tensors.compressors.quantized_compressors.base import ( BaseQuantizationCompressor, ) from compressed_tensors.config import CompressionFormat @@ -27,14 +27,14 @@ __all__ = [ - "QuantizationCompressor", + "NaiveQuantizationCompressor", "IntQuantizationCompressor", "FloatQuantizationCompressor", ] @BaseCompressor.register(name=CompressionFormat.naive_quantized.value) -class QuantizationCompressor(BaseQuantizationCompressor): +class NaiveQuantizationCompressor(BaseQuantizationCompressor): """ Implements naive compression for quantized models. Weight of each quantized layer is converted from its original float type to the closest Pytorch @@ -123,7 +123,7 @@ def decompress_weight( @BaseCompressor.register(name=CompressionFormat.int_quantized.value) -class IntQuantizationCompressor(QuantizationCompressor): +class IntQuantizationCompressor(NaiveQuantizationCompressor): """ Alias for integer quantized models """ @@ -132,7 +132,7 @@ class IntQuantizationCompressor(QuantizationCompressor): @BaseCompressor.register(name=CompressionFormat.float_quantized.value) -class FloatQuantizationCompressor(QuantizationCompressor): +class FloatQuantizationCompressor(NaiveQuantizationCompressor): """ Alias for fp quantized models """ diff --git a/src/compressed_tensors/compressors/pack_quantized.py b/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py similarity index 99% rename from src/compressed_tensors/compressors/pack_quantized.py rename to src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py index 9d63e264..ce9f0a57 100644 --- a/src/compressed_tensors/compressors/pack_quantized.py +++ b/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py @@ -17,7 +17,7 @@ import numpy as np import torch from compressed_tensors.compressors.base import BaseCompressor -from compressed_tensors.compressors.base_quantization_compressor import ( +from compressed_tensors.compressors.quantized_compressors.base import ( BaseQuantizationCompressor, ) from compressed_tensors.config import CompressionFormat diff --git a/src/compressed_tensors/compressors/sparse_compressors/__init__.py b/src/compressed_tensors/compressors/sparse_compressors/__init__.py new file mode 100644 index 00000000..de4fd887 --- /dev/null +++ b/src/compressed_tensors/compressors/sparse_compressors/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# flake8: noqa + +from .base import * +from .dense import * +from .sparse_bitmask import * diff --git a/src/compressed_tensors/compressors/base_sparsity_compressor.py b/src/compressed_tensors/compressors/sparse_compressors/base.py similarity index 92% rename from src/compressed_tensors/compressors/base_sparsity_compressor.py rename to src/compressed_tensors/compressors/sparse_compressors/base.py index f7ebbc2f..308ddab2 100644 --- a/src/compressed_tensors/compressors/base_sparsity_compressor.py +++ b/src/compressed_tensors/compressors/sparse_compressors/base.py @@ -38,22 +38,22 @@ class BaseSparseCompressor(BaseCompressor): Model Load Lifecycle (run_compressed=False): - ModelCompressor.decompress() - apply_quantization_config() - - Compressor.decompress() - - Compressor.decompress_weight() + - BaseSparseCompressor.decompress() + - BaseSparseCompressor.decompress_weight() Model Save Lifecycle: - ModelCompressor.compress() - - Compressor.compress() - - Compressor.compress_weight() + - BaseSparseCompressor.compress() + - BaseSparseCompressor.compress_weight() Module Lifecycle (run_compressed=True): - apply_quantization_config() - compressed_module = CompressedLinear(module) - initialize_module_for_quantization() - - Compressor.compression_param_info() + - BaseSparseCompressor.compression_param_info() - register_parameters() - compressed_module.forward() - - compressed_module.decompress() + -compressed_module.decompress() :param config: config specifying compression parameters diff --git a/src/compressed_tensors/compressors/dense.py b/src/compressed_tensors/compressors/sparse_compressors/dense.py similarity index 100% rename from src/compressed_tensors/compressors/dense.py rename to src/compressed_tensors/compressors/sparse_compressors/dense.py diff --git a/src/compressed_tensors/compressors/sparse_bitmask.py b/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py similarity index 98% rename from src/compressed_tensors/compressors/sparse_bitmask.py rename to src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py index 63124163..a950aa64 100644 --- a/src/compressed_tensors/compressors/sparse_bitmask.py +++ b/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py @@ -17,7 +17,7 @@ import numpy import torch from compressed_tensors.compressors.base import BaseCompressor -from compressed_tensors.compressors.base_sparsity_compressor import BaseSparseCompressor +from compressed_tensors.compressors.sparse_compressors.base import BaseSparseCompressor from compressed_tensors.config import CompressionFormat from compressed_tensors.utils import merge_names from torch import Tensor diff --git a/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py b/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py new file mode 100644 index 00000000..c3615f06 --- /dev/null +++ b/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# flake8: noqa + +from .marlin_24 import Marlin24Compressor diff --git a/src/compressed_tensors/compressors/marlin_24.py b/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py similarity index 100% rename from src/compressed_tensors/compressors/marlin_24.py rename to src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py diff --git a/tests/test_compressors/model_compressors/__init__.py b/tests/test_compressors/model_compressors/__init__.py new file mode 100644 index 00000000..0c44f887 --- /dev/null +++ b/tests/test_compressors/model_compressors/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/test_compressors/test_model_compressor.py b/tests/test_compressors/model_compressors/test_model_compressor.py similarity index 100% rename from tests/test_compressors/test_model_compressor.py rename to tests/test_compressors/model_compressors/test_model_compressor.py diff --git a/tests/test_compressors/quantized_compressors/__init__.py b/tests/test_compressors/quantized_compressors/__init__.py new file mode 100644 index 00000000..0c44f887 --- /dev/null +++ b/tests/test_compressors/quantized_compressors/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/test_compressors/test_fp8_quant.py b/tests/test_compressors/quantized_compressors/test_fp8_quant.py similarity index 100% rename from tests/test_compressors/test_fp8_quant.py rename to tests/test_compressors/quantized_compressors/test_fp8_quant.py diff --git a/tests/test_compressors/test_int_quant.py b/tests/test_compressors/quantized_compressors/test_int_quant.py similarity index 100% rename from tests/test_compressors/test_int_quant.py rename to tests/test_compressors/quantized_compressors/test_int_quant.py diff --git a/tests/test_compressors/test_pack_quant.py b/tests/test_compressors/quantized_compressors/test_pack_quant.py similarity index 99% rename from tests/test_compressors/test_pack_quant.py rename to tests/test_compressors/quantized_compressors/test_pack_quant.py index bef8adc3..496e8304 100644 --- a/tests/test_compressors/test_pack_quant.py +++ b/tests/test_compressors/quantized_compressors/test_pack_quant.py @@ -20,7 +20,7 @@ import pytest import torch from compressed_tensors import PackedQuantizationCompressor -from compressed_tensors.compressors.pack_quantized import ( +from compressed_tensors.compressors.quantized_compressors.pack_quantized import ( pack_to_int32, unpack_from_int32, ) diff --git a/tests/test_compressors/sparse_compressors/__init__.py b/tests/test_compressors/sparse_compressors/__init__.py new file mode 100644 index 00000000..0c44f887 --- /dev/null +++ b/tests/test_compressors/sparse_compressors/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/test_compressors/test_bitmask.py b/tests/test_compressors/sparse_compressors/test_bitmask.py similarity index 100% rename from tests/test_compressors/test_bitmask.py rename to tests/test_compressors/sparse_compressors/test_bitmask.py diff --git a/tests/test_compressors/sparse_quantized_compressors/__init__.py b/tests/test_compressors/sparse_quantized_compressors/__init__.py new file mode 100644 index 00000000..0c44f887 --- /dev/null +++ b/tests/test_compressors/sparse_quantized_compressors/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/test_compressors/test_marlin_24.py b/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py similarity index 100% rename from tests/test_compressors/test_marlin_24.py rename to tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py