Skip to content

Commit

Permalink
defining quantization config models
Browse files Browse the repository at this point in the history
  • Loading branch information
Sara Adkins committed Apr 11, 2024
1 parent f4837ed commit fa6a48f
Show file tree
Hide file tree
Showing 5 changed files with 189 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/sparsetensors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@
# flake8: noqa
from .compressors import *
from .config import *
from .quantization import QuantizationConfig, QuantizationStatus
from .utils import *
18 changes: 18 additions & 0 deletions src/sparsetensors/quantization/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# flake8: noqa
from .quant_args import *
from .quant_config import *
from .quant_scheme import *
63 changes: 63 additions & 0 deletions src/sparsetensors/quantization/quant_args.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from enum import Enum
from typing import Optional

from pydantic import BaseModel


__all__ = ["QuantizationType", "QuantizationStrategy", "QuantizationArgs"]


class QuantizationType(Enum):
"""
Enum storing quantization type options
"""

INT = "int"
FLOAT = "float"


class QuantizationStrategy(Enum):
"""
Enum storing quantization strategy options
"""

TENSOR = "tensor"
CHANNEL = "channel"
GROUP = "group"
BLOCK = "block"


class QuantizationArgs(BaseModel):
"""
User facing arguments used to define a quantization config for weights or
activations
:param num_bits: quantization bit depth
:param type: dtype to quantized to, either int or float
:param symmetric: whether or not quantization scale is symmetric about zero-point
:param strategy: string id determining the scope of scale/zero-point to apply
:param group_size: group length to use for the group strategy
:param block_structure: 2d block structure to use for the block strategy, must be
of the format "2x4", "8x16", etc.
"""

num_bits: int = 8
type: QuantizationType = QuantizationType.INT
symmetric: bool = True
strategy: QuantizationStrategy = QuantizationStrategy.TENSOR
group_size: Optional[int] = None
block_structure: Optional[str] = None
68 changes: 68 additions & 0 deletions src/sparsetensors/quantization/quant_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from enum import Enum
from typing import Dict, List, Optional

from pydantic import BaseModel
from sparsetensors.quantization.quant_scheme import QuantizationScheme


__all__ = ["QuantizationStatus", "QuantizationConfig"]


class QuantizationStatus(Enum):
"""
Enum storing the different states a quantized layer can be in
Initialized: scale, zero points and observers have been attached to the layer but
are set to dummy values (not yet calibrated)
Calibration: scale and zero points have been calibrated through OBCQ or similar
algorithm, observers are still attached
Frozen: scale and zero points are finalized, observers have been deleted, weights
are still in their original precision
Compressed: weights have been converted to their target type or compressed to
their closed approximation
"""

INITIALIZED = "initialized"
CALIBRATION = "calibration"
FROZEN = "frozen"
COMPRESSED = "compressed"


class QuantizationConfig(BaseModel):
"""
Full configuration specifying how a model is quantized. Each quantized layer is
mapped to a QuantizationScheme in config_groups.
:param config_groups: dict of QuantizationSchemes specifying the quantization
settings for each quantized layer
:param quant_method: a constant used to differentiate sparseML quantization from
other quantization configs
:param format: specifies how the quantized model is stored on disk
:quantization_status: specifies the current status of all quantized layers. It is
assumed all layers are in the same state.
:global_compression_ratio: optional informational config to report the model
compression ratio acheived by the quantization config
:ignore: optional list of layers to ignore from config_groups. Layers in this list
are not quantized even if they match up with a target in config_groups
"""

config_groups: Dict[str, QuantizationScheme]
quant_method: str = "sparseml"
format: str = "fakequant"
quantization_status: QuantizationStatus = QuantizationStatus.INITIALIZED
global_compression_ratio: Optional[float] = None
ignore: Optional[List[str]] = None
39 changes: 39 additions & 0 deletions src/sparsetensors/quantization/quant_scheme.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List, Optional

from pydantic import BaseModel
from sparsetensors.quantization.quant_args import QuantizationArgs


__all__ = ["QuantizationScheme"]


class QuantizationScheme(BaseModel):
"""
Set of QuantizationArgs defining how the weights, inputs and outputs of target list
of modules should be quantized
:param targets: list of modules to apply the QuantizationArgs to, can be layer
names, layer types or a regular expression
:param weights: quantization config for layer weights
:param input_activations: quantization config for layer inputs
:param output_activations: quantization config for layer outputs
"""

targets: List[str]
weights: Optional[QuantizationArgs] = None
input_activations: Optional[QuantizationArgs] = None
output_activations: Optional[QuantizationArgs] = None

0 comments on commit fa6a48f

Please sign in to comment.