neuralmagic · dbogunowicz · Apr 25, 2024 · Apr 19, 2024 · Apr 19, 2024 · Apr 19, 2024
diff --git a/README.md b/README.md
@@ -1 +1,98 @@
 # compressed-tensors
+
+This repository extends a [safetensors](https://github.com/huggingface/safetensors) format to efficiently store sparse and/or quantized tensors on disk. `compressed-tensors` format supports multiple compression types to minimize the disk space and facilitate the tensor manipulation.
+
+## Motivation
+
+### Reduce disk space by saving sparse tensors in a compressed format
+
+The compressed format stores the data much more efficiently by taking advantage of two properties of tensors:
+
+- Sparse tensors -> due to a large number of entries that are equal to zero.
+- Quantized -> due to their low precision representation.
+
+
+### Introduce an elegant interface to save/load compressed tensors
+
+The library provides the user with the ability to compress/decompress tensors. The properties of tensors are defined by human-readable configs, allowing the users to understand the compression format at a quick glance.
+
+## Installation
+
+### Pip
+
+```bash
+pip install compressed-tensors
+```
+
+### From source
+
+```bash
+git clone https://github.com/neuralmagic/compressed-tensors
+cd compressed-tensors
+pip install -e .
+```
+
+## Getting started
+
+### Saving
+
+The function `save_compressed` returns an optional `compression_config` (if compression has been applied). It can be used to inspect the applied compression.
+
+```python
+from compressed_tensors import save_compressed
+from torch import Tensor
+
+tensors: Dict[str, Tensor] = ...
+compression_config: Dict = save_compressed(tensors, "model.safetensors")
+```
+
+### Loading
+
+```python
+from compressed_tensors import load_compressed
+from torch import Tensor
+
+tensors: Dict[str, Tensor] = load_compressed("model.safetensors", device="cpu")
+```
+
+## Benefits
+TODO
+
+## SafeTensors File Format
+
+For each parameter in the uncompressed state_dict, we store the following attributes needed for decompression in the compressed state_dict:
+
+- Compressed tensor
+- Bitmask
+- Uncompressed shape
+- Row offsets
+
+```python
+# Dense
+{
+    PARAM_NAME: uncompressed_tensor
+}
+
+# Compressed
+{
+    PARAM_NAME.compressed: compressed_tensor,  # 1d tensor
+    PARAM_NAME.bitmask: value,  # 2d bitmask tensor (nrows x (ncols / 8))
+    PARAM_NAME.shape: value,  # Uncompressed shape tensor
+    PARAM_NAME.row_offsets: value  # 1d offsets tensor
+}
+```
+
+The library provides pathways to automatically add the config information to the HF config file.
+
+```json
+// config.json
+{
+    "sparsity_config": {
+        "format": "sparse_bitmask", // "dense_sparsity" for the original tensor format
+
+        // Informational
+        "sparsity_structure": "unstructured", // Or 2:4, 8:16, etc.
+        "global_sparsity": "0.5"
+    }
+}
+```
diff --git a/src/compressed_tensors/base.py b/src/compressed_tensors/base.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-SPARSITY_CONFIG_NAME = "sparsity_config"
+CONFIG_NAME = "compression_config"
diff --git a/src/compressed_tensors/compressors/base.py b/src/compressed_tensors/compressors/base.py
@@ -15,7 +15,7 @@
 import operator
 from typing import Dict, Generator, Tuple
 
-from compressed_tensors.base import SPARSITY_CONFIG_NAME
+from compressed_tensors.base import CONFIG_NAME
 from compressed_tensors.config import CompressionConfig
 from compressed_tensors.registry import RegistryMixin
 from torch import Tensor
@@ -45,12 +45,16 @@ def compress(self, model_state: Dict[str, Tensor]) -> Dict[str, Tensor]:
         """
         raise NotImplementedError()
 
-    def decompress(self, model_path: str) -> Generator[Tuple[str, Tensor], None, None]:
+    def decompress(
+        self, path_to_model_or_tensors: str
+    ) -> Generator[Tuple[str, Tensor], None, None]:
         """
-        Reads a compressed state dict located at model_path and returns a
-        generator for sequentially decompressing back to a dense state dict
+        Reads a compressed state dict located at path_to_model_or_tensors
+        and returns a generator for sequentially decompressing back to a
+        dense state dict
 
-        :param model_path: path to compressed safetensors model
+        :param model_path: path to compressed safetensors model (directory with
+            one or more safetensors files) or compressed tensors file
         :return: compressed state dict
         """
         raise NotImplementedError()
@@ -70,4 +74,4 @@ def overwrite_weights(self, model_path: str, model: Module):
             data_old = operator.attrgetter(name)(model)
             data_old.data = data_new.data
 
-        setattr(model, SPARSITY_CONFIG_NAME, self.config)
+        setattr(model, CONFIG_NAME, self.config)
diff --git a/src/compressed_tensors/compressors/dense.py b/src/compressed_tensors/compressors/dense.py
@@ -27,5 +27,7 @@ class DenseCompressor(ModelCompressor):
     def compress(self, model_state: Dict[str, Tensor]) -> Dict[str, Tensor]:
         return model_state
 
-    def decompress(self, model_path: str) -> Generator[Tuple[str, Tensor], None, None]:
+    def decompress(
+        self, path_to_model_or_tensors: str
+    ) -> Generator[Tuple[str, Tensor], None, None]:
         return iter([])
diff --git a/src/compressed_tensors/compressors/sparse_bitmask.py b/src/compressed_tensors/compressors/sparse_bitmask.py
@@ -70,22 +70,26 @@ def compress(self, model_state: Dict[str, Tensor]) -> Dict[str, Tensor]:
 
         return compressed_dict
 
-    def decompress(self, model_path: str) -> Generator[Tuple[str, Tensor], None, None]:
+    def decompress(
+        self, path_to_model_or_tensors: str, device: str = "cpu"
+    ) -> Generator[Tuple[str, Tensor], None, None]:
         """
-        Reads a bitmask compressed state dict located at model_path and returns a
-        generator for sequentially decompressing back to a dense state dict
+        Reads a bitmask compressed state dict located at path_to_model_or_tensors
+        and returns agenerator for sequentially decompressing back to a dense state dict
 
-        :param model_path: path to compressed safetensors model
+        :param model_path: path to compressed safetensors model (directory with
+            one or more safetensors files) or compressed tensors file
+        :param device: device to load decompressed weights onto
         :return: iterator for generating decompressed weights
         """
         weight_mappings = get_nested_weight_mappings(
-            model_path, self.COMPRESSION_PARAM_NAMES
+            path_to_model_or_tensors, self.COMPRESSION_PARAM_NAMES
         )
         for weight_name in weight_mappings.keys():
             weight_data = {}
             for param_name, safe_path in weight_mappings[weight_name].items():
                 full_name = merge_names(weight_name, param_name)
-                with safe_open(safe_path, framework="pt", device="cpu") as f:
+                with safe_open(safe_path, framework="pt", device=device) as f:
                     weight_data[param_name] = f.get_tensor(full_name)
             data = BitmaskTensor(**weight_data)
             decompressed = data.decompress()

diff --git a/src/compressed_tensors/utils/helpers.py b/src/compressed_tensors/utils/helpers.py
@@ -12,16 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from pathlib import Path
+from typing import Dict, Optional, Union
 
-from typing import Optional
-
-from compressed_tensors.base import SPARSITY_CONFIG_NAME
+from compressed_tensors.base import CONFIG_NAME
 from compressed_tensors.compressors import ModelCompressor
 from compressed_tensors.config import CompressionConfig
+from safetensors import safe_open
+from safetensors.torch import save_file
+from torch import Tensor
 from transformers import AutoConfig
 
 
-__all__ = ["infer_compressor_from_model_config"]
+__all__ = ["infer_compressor_from_model_config", "load_compressed", "save_compressed"]
 
 
 def infer_compressor_from_model_config(
@@ -35,11 +38,90 @@ def infer_compressor_from_model_config(
     :return: matching compressor if config contains a sparsity config
     """
     config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
-    sparsity_config = getattr(config, SPARSITY_CONFIG_NAME, None)
+    sparsity_config = getattr(config, CONFIG_NAME, None)
     if sparsity_config is None:
         return None
 
     format = sparsity_config.get("format")
     sparsity_config = CompressionConfig.load_from_registry(format, **sparsity_config)
     compressor = ModelCompressor.load_from_registry(format, config=sparsity_config)
     return compressor
+
+
+def save_compressed(
+    tensors: Dict[str, Tensor],
+    save_path: Union[str, Path],
+    compression_config: Optional[CompressionConfig] = None,
+) -> Optional[CompressionConfig]:
+    """
+    Save compressed tensors to disk. If tensors are not compressed,
+    save them as is.
+
+    :param tensors: dictionary of tensors to compress
+    :param save_path: path to save compressed tensors
+    :param compression_config: compression config to use for compressing tensors.
+        Can be either inferred from tensors or provided explicitly
+    :return: compression config, if tensors were compressed - None otherwise
+    """
+    if tensors is None or len(tensors) == 0:
+        raise ValueError("No tensors or empty tensors provided to compress")
+
+    # create compression config if not provided
+    # TODO: Not implemented, need to get this in ASAP
+    # compression_config = compression_config or infer_compression_config(tensors)
+
+    if compression_config is None:
+        # no compression applied
+        save_file(tensors, save_path)
+        return None
+
+    # compress
+    compression_format = compression_config.format
+    compressor = ModelCompressor.load_from_registry(
+        compression_format, config=compression_config
+    )
+    # save compressed tensors
+    compressed_tensors = compressor.compress(tensors)
+    save_file(compressed_tensors, save_path)
+
+    # return compression_config as dict
+    return {CONFIG_NAME: compression_config.model_dump(exclude_unset=True)}
+
+
+def load_compressed(
+    compressed_tensors: Union[str, Path],
+    compression_config: Optional[CompressionConfig] = None,
+    device: Optional[str] = "cpu",
+) -> Dict[str, Tensor]:
+    """
+    Load compressed tensors from disk. If tensors are not compressed,
+    load them as is.
+
+    :param compressed_tensors: path to compressed tensors
+    :param compression_config: compression config to use for decompressing tensors.
+        Can be either inferred from tensors or provided explicitly.
+    :param device: device to move tensors to. If None, tensors are loaded on CPU.
+    :return decompressed tensors
+    """
+
+    if compressed_tensors is None or not Path(compressed_tensors).exists():
+        raise ValueError("No compressed tensors provided to load")
+
+    # create compression config if not provided
+    # TODO: Not implemented, need to get this in ASAP
+    # compression_config = compression_config or infer_compression_config(tensors)
+
+    if compression_config is None:
+        # no compression applied
+        tensors = {}
+        with safe_open(compressed_tensors, framework="pt", device="cpu") as f:
+            for key in f.keys():
+                tensors[key] = f.get_tensor(key)
+        return tensors
+
+    # decompress
+    compression_format = compression_config.format
+    compressor = ModelCompressor.load_from_registry(
+        compression_format, config=compression_config
+    )
+    return dict(compressor.decompress(compressed_tensors))
diff --git a/src/compressed_tensors/utils/safetensors_load.py b/src/compressed_tensors/utils/safetensors_load.py
@@ -117,7 +117,7 @@ def merge_names(parent_name: str, child_name: str) -> str:
     return parent_name + "." + child_name
 
 
-def get_weight_mappings(model_path: str) -> Dict[str, str]:
+def get_weight_mappings(path_to_model_or_tensors: str) -> Dict[str, str]:
     """
     Takes a path to a state dict saved in safetensors format and returns a mapping
     from parameterized layer name to file location.
@@ -131,31 +131,42 @@ def get_weight_mappings(model_path: str) -> Dict[str, str]:
 
     This generalizes to cases where the model is split into multiple safetensors files
 
-    :param model_path: path to safetensors state dict, must contain either a single
-    safetensors file or multiple files with an index
+    :param path_to_model_or_tensors: path to directory that contains
+        safetensors (must contain either a single file or multiple files with an index),
+        or a path to a single safetensors file
     :return: mapping of parameterized layer name to file location
     """
-    safetensors_path = os.path.join(model_path, SAFE_WEIGHTS_NAME)
-    index_path = os.path.join(model_path, SAFE_WEIGHTS_INDEX_NAME)
-    if os.path.exists(safetensors_path):
+
+    if os.path.isfile(path_to_model_or_tensors):
         # we have a single safetensors file to read
-        header = get_safetensors_header(safetensors_path)
+        header = get_safetensors_header(path_to_model_or_tensors)
         for key in header.keys():
-            header[key] = SAFE_WEIGHTS_NAME
+            header[key] = path_to_model_or_tensors
         header.pop("__metadata__", None)
-    elif os.path.exists(index_path):
-        # we have multiple safetensors file, read from index
-        with open(index_path, "r", encoding="utf-8") as f:
-            index = json.load(f)
-        header = index["weight_map"]
     else:
-        raise ValueError(
-            f"Could not find a safetensors weight or index file at {model_path}"
-        )
-
-    # convert weight locations to full paths
-    for key, value in header.items():
-        header[key] = os.path.join(model_path, value)
+        # we have a directory with multiple safetensors files
+        safetensors_path = os.path.join(path_to_model_or_tensors, SAFE_WEIGHTS_NAME)
+        index_path = os.path.join(path_to_model_or_tensors, SAFE_WEIGHTS_INDEX_NAME)
+        if os.path.exists(safetensors_path):
+            # we have a single safetensors file to read
+            header = get_safetensors_header(safetensors_path)
+            for key in header.keys():
+                header[key] = SAFE_WEIGHTS_NAME
+            header.pop("__metadata__", None)
+        elif os.path.exists(index_path):
+            # we have multiple safetensors file, read from index
+            with open(index_path, "r", encoding="utf-8") as f:
+                index = json.load(f)
+            header = index["weight_map"]
+        else:
+            raise ValueError(
+                "Could not find a safetensors weight "
+                f"or index file at {path_to_model_or_tensors}"
+            )
+
+        # convert weight locations to full paths
+        for key, value in header.items():
+            header[key] = os.path.join(path_to_model_or_tensors, value)
 
     return header