From fa6a48f1da6b47106912bcd25eba7171ba7cfec7 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Thu, 11 Apr 2024 18:05:45 +0000 Subject: [PATCH] defining quantization config models --- src/sparsetensors/__init__.py | 1 + src/sparsetensors/quantization/__init__.py | 18 +++++ src/sparsetensors/quantization/quant_args.py | 63 +++++++++++++++++ .../quantization/quant_config.py | 68 +++++++++++++++++++ .../quantization/quant_scheme.py | 39 +++++++++++ 5 files changed, 189 insertions(+) create mode 100644 src/sparsetensors/quantization/__init__.py create mode 100644 src/sparsetensors/quantization/quant_args.py create mode 100644 src/sparsetensors/quantization/quant_config.py create mode 100644 src/sparsetensors/quantization/quant_scheme.py diff --git a/src/sparsetensors/__init__.py b/src/sparsetensors/__init__.py index 3eefa2c8..0833dd42 100644 --- a/src/sparsetensors/__init__.py +++ b/src/sparsetensors/__init__.py @@ -17,4 +17,5 @@ # flake8: noqa from .compressors import * from .config import * +from .quantization import QuantizationConfig, QuantizationStatus from .utils import * diff --git a/src/sparsetensors/quantization/__init__.py b/src/sparsetensors/quantization/__init__.py new file mode 100644 index 00000000..b53a328a --- /dev/null +++ b/src/sparsetensors/quantization/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# flake8: noqa +from .quant_args import * +from .quant_config import * +from .quant_scheme import * diff --git a/src/sparsetensors/quantization/quant_args.py b/src/sparsetensors/quantization/quant_args.py new file mode 100644 index 00000000..89a2e3df --- /dev/null +++ b/src/sparsetensors/quantization/quant_args.py @@ -0,0 +1,63 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from enum import Enum +from typing import Optional + +from pydantic import BaseModel + + +__all__ = ["QuantizationType", "QuantizationStrategy", "QuantizationArgs"] + + +class QuantizationType(Enum): + """ + Enum storing quantization type options + """ + + INT = "int" + FLOAT = "float" + + +class QuantizationStrategy(Enum): + """ + Enum storing quantization strategy options + """ + + TENSOR = "tensor" + CHANNEL = "channel" + GROUP = "group" + BLOCK = "block" + + +class QuantizationArgs(BaseModel): + """ + User facing arguments used to define a quantization config for weights or + activations + + :param num_bits: quantization bit depth + :param type: dtype to quantized to, either int or float + :param symmetric: whether or not quantization scale is symmetric about zero-point + :param strategy: string id determining the scope of scale/zero-point to apply + :param group_size: group length to use for the group strategy + :param block_structure: 2d block structure to use for the block strategy, must be + of the format "2x4", "8x16", etc. + """ + + num_bits: int = 8 + type: QuantizationType = QuantizationType.INT + symmetric: bool = True + strategy: QuantizationStrategy = QuantizationStrategy.TENSOR + group_size: Optional[int] = None + block_structure: Optional[str] = None diff --git a/src/sparsetensors/quantization/quant_config.py b/src/sparsetensors/quantization/quant_config.py new file mode 100644 index 00000000..1c8bd796 --- /dev/null +++ b/src/sparsetensors/quantization/quant_config.py @@ -0,0 +1,68 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from enum import Enum +from typing import Dict, List, Optional + +from pydantic import BaseModel +from sparsetensors.quantization.quant_scheme import QuantizationScheme + + +__all__ = ["QuantizationStatus", "QuantizationConfig"] + + +class QuantizationStatus(Enum): + """ + Enum storing the different states a quantized layer can be in + + Initialized: scale, zero points and observers have been attached to the layer but + are set to dummy values (not yet calibrated) + Calibration: scale and zero points have been calibrated through OBCQ or similar + algorithm, observers are still attached + Frozen: scale and zero points are finalized, observers have been deleted, weights + are still in their original precision + Compressed: weights have been converted to their target type or compressed to + their closed approximation + """ + + INITIALIZED = "initialized" + CALIBRATION = "calibration" + FROZEN = "frozen" + COMPRESSED = "compressed" + + +class QuantizationConfig(BaseModel): + """ + Full configuration specifying how a model is quantized. Each quantized layer is + mapped to a QuantizationScheme in config_groups. + + :param config_groups: dict of QuantizationSchemes specifying the quantization + settings for each quantized layer + :param quant_method: a constant used to differentiate sparseML quantization from + other quantization configs + :param format: specifies how the quantized model is stored on disk + :quantization_status: specifies the current status of all quantized layers. It is + assumed all layers are in the same state. + :global_compression_ratio: optional informational config to report the model + compression ratio acheived by the quantization config + :ignore: optional list of layers to ignore from config_groups. Layers in this list + are not quantized even if they match up with a target in config_groups + """ + + config_groups: Dict[str, QuantizationScheme] + quant_method: str = "sparseml" + format: str = "fakequant" + quantization_status: QuantizationStatus = QuantizationStatus.INITIALIZED + global_compression_ratio: Optional[float] = None + ignore: Optional[List[str]] = None diff --git a/src/sparsetensors/quantization/quant_scheme.py b/src/sparsetensors/quantization/quant_scheme.py new file mode 100644 index 00000000..7077c24e --- /dev/null +++ b/src/sparsetensors/quantization/quant_scheme.py @@ -0,0 +1,39 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Optional + +from pydantic import BaseModel +from sparsetensors.quantization.quant_args import QuantizationArgs + + +__all__ = ["QuantizationScheme"] + + +class QuantizationScheme(BaseModel): + """ + Set of QuantizationArgs defining how the weights, inputs and outputs of target list + of modules should be quantized + + :param targets: list of modules to apply the QuantizationArgs to, can be layer + names, layer types or a regular expression + :param weights: quantization config for layer weights + :param input_activations: quantization config for layer inputs + :param output_activations: quantization config for layer outputs + """ + + targets: List[str] + weights: Optional[QuantizationArgs] = None + input_activations: Optional[QuantizationArgs] = None + output_activations: Optional[QuantizationArgs] = None