All modules for which code is available
-- sparseml.keras.optim.manager +
- sparseml.keras.datasets.classification.imagefolder +
- sparseml.keras.datasets.classification.imagenet +
- sparseml.keras.datasets.classification.imagenette +
- sparseml.keras.datasets.dataset +
- sparseml.keras.datasets.helpers +
- sparseml.keras.datasets.registry +
- sparseml.keras.models.classification.resnet +
- sparseml.keras.models.registry +
- sparseml.keras.optim.manager
- sparseml.keras.optim.mask_pruning
- sparseml.keras.optim.mask_pruning_creator
- sparseml.keras.optim.modifier @@ -183,6 +191,7 @@
- sparseml.keras.optim.modifier_pruning
- sparseml.keras.optim.utils
- sparseml.keras.utils.callbacks +
- sparseml.keras.utils.compat
- sparseml.keras.utils.exporter
- sparseml.keras.utils.logger
- sparseml.keras.utils.model @@ -246,18 +255,19 @@
- sparseml.pytorch.optim.modifier_quantization
- sparseml.pytorch.optim.modifier_regularizer
- sparseml.pytorch.optim.optimizer -
- sparseml.pytorch.optim.quantization.helpers -
- sparseml.pytorch.optim.quantization.quantize_qat_export
- sparseml.pytorch.optim.sensitivity_as
- sparseml.pytorch.optim.sensitivity_lr
- sparseml.pytorch.optim.sensitivity_pruning
- sparseml.pytorch.utils.benchmarker +
- sparseml.pytorch.utils.callbacks
- sparseml.pytorch.utils.exporter
- sparseml.pytorch.utils.helpers
- sparseml.pytorch.utils.logger
- sparseml.pytorch.utils.loss
- sparseml.pytorch.utils.model
- sparseml.pytorch.utils.module +
- sparseml.pytorch.utils.quantization.helpers +
- sparseml.pytorch.utils.quantization.quantize_qat_export
- sparseml.pytorch.utils.ssd_helpers
- sparseml.pytorch.utils.yolo_helpers
- sparseml.tensorflow_v1.datasets.classification.cifar diff --git a/sparseml/_modules/sparseml/keras/datasets/classification/imagefolder.html b/sparseml/_modules/sparseml/keras/datasets/classification/imagefolder.html new file mode 100644 index 00000000000..5ef0987a709 --- /dev/null +++ b/sparseml/_modules/sparseml/keras/datasets/classification/imagefolder.html @@ -0,0 +1,488 @@ + + + + + + + + + +
- Module code » -
- sparseml.pytorch.optim.quantization.helpers +
- sparseml.pytorch.utils.quantization.helpers
All modules for which code is available
All modules for which code is available
Source code for sparseml.keras.datasets.classification.imagefolder
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+ImageFolder dataset implementations for the image
+classification field in computer vision.
+"""
+
+import glob
+import os
+import random
+from typing import Callable, Iterable, NamedTuple, Tuple, Union
+
+import numpy
+import tensorflow
+
+from sparseml.keras.datasets.dataset import Dataset
+from sparseml.keras.datasets.helpers import random_scaling_crop
+from sparseml.keras.datasets.registry import DatasetRegistry
+from sparseml.keras.utils.compat import keras
+from sparseml.utils import clean_path
+from sparseml.utils.datasets import IMAGENET_RGB_MEANS, IMAGENET_RGB_STDS
+
+
+__all__ = ["imagenet_normalizer", "ImageFolderDataset", "SplitsTransforms"]
+
+
+SplitsTransforms = NamedTuple(
+ "SplitsTransforms",
+ [
+ ("train", Union[Iterable[Callable], None]),
+ ("val", Union[Iterable[Callable], None]),
+ ],
+)
+
+
+[docs]def imagenet_normalizer(img: tensorflow.Tensor, mode: str):
+ """
+ Normalize an image using mean and std of the imagenet dataset
+ :param img: The input image to normalize
+ :param mode: either "tf", "caffe", "torch"
+ :return: The normalized image
+ """
+ if mode == "tf":
+ preprocess_input = keras.applications.mobilenet.preprocess_input
+ elif mode == "caffe":
+ preprocess_input = keras.applications.resnet.preprocess_input
+ elif mode == "torch":
+ preprocess_input = None
+ else:
+ raise ValueError("Unknown preprocessing method")
+ if preprocess_input is not None:
+ processed_image = preprocess_input(img)
+ else:
+ res = tensorflow.cast(img, dtype=tensorflow.float32) / 255.0
+ means = tensorflow.constant(IMAGENET_RGB_MEANS, dtype=tensorflow.float32)
+ stds = tensorflow.constant(IMAGENET_RGB_STDS, dtype=tensorflow.float32)
+ processed_image = (res - means) / stds
+ return processed_image
+
+
+def default_imagenet_normalizer():
+ def normalizer(img: tensorflow.Tensor):
+ # Default to the same preprocessing used by Keras Applications ResNet
+ return imagenet_normalizer(img, "caffe")
+
+ return normalizer
+
+
+[docs]@DatasetRegistry.register(
+ key=["imagefolder"],
+ attributes={
+ "transform_means": IMAGENET_RGB_MEANS,
+ "transform_stds": IMAGENET_RGB_STDS,
+ },
+)
+class ImageFolderDataset(Dataset):
+ """
+ Implementation for loading an image folder structure into a dataset.
+ | Image folders should be of the form:
+ | root/class_x/xxx.ext
+ | root/class_x/xxy.ext
+ | root/class_x/xxz.ext
+ |
+ | root/class_y/123.ext
+ | root/class_y/nsdf3.ext
+ | root/class_y/asd932_.ext
+ :param root: the root location for the dataset's images to load
+ :param train: True to load the training dataset from the root,
+ False for validation
+ :param image_size: the size of the image to reshape to
+ :param pre_resize_transforms: transforms to be applied before resizing the image
+ :param post_resize_transforms: transforms to be applied after resizing the image
+ """
+
+ def __init__(
+ self,
+ root: str,
+ train: bool,
+ image_size: Union[None, int, Tuple[int, int]] = 224,
+ pre_resize_transforms: Union[SplitsTransforms, None] = SplitsTransforms(
+ train=(
+ random_scaling_crop(),
+ tensorflow.image.random_flip_left_right,
+ ),
+ val=None,
+ ),
+ post_resize_transforms: Union[SplitsTransforms, None] = SplitsTransforms(
+ train=(default_imagenet_normalizer(),),
+ val=(default_imagenet_normalizer(),),
+ ),
+ ):
+ self._root = os.path.join(clean_path(root), "train" if train else "val")
+ if not os.path.exists(self._root):
+ raise ValueError("Data set folder {} must exist".format(self._root))
+ self._train = train
+ if image_size is not None:
+ self._image_size = (
+ image_size
+ if isinstance(image_size, tuple)
+ else (image_size, image_size)
+ )
+ else:
+ self._image_size = None
+ self._pre_resize_transforms = pre_resize_transforms
+ self._post_resize_transforms = post_resize_transforms
+
+ self._num_images = len(
+ [None for _ in glob.glob(os.path.join(self._root, "*", "*"))]
+ )
+ self._num_classes = len(
+ [None for _ in glob.glob(os.path.join(self._root, "*", ""))]
+ )
+
+ def __len__(self):
+ return self._num_images
+
+ @property
+ def root(self) -> str:
+ """
+ :return: the root location for the dataset's images to load
+ """
+ return self._root
+
+ @property
+ def train(self) -> bool:
+ """
+ :return: True to load the training dataset from the root, False for validation
+ """
+ return self._train
+
+ @property
+ def image_size(self) -> Tuple[int, int]:
+ """
+ :return: the size of the images to resize to
+ """
+ return self._image_size
+
+ @property
+ def pre_resize_transforms(self) -> SplitsTransforms:
+ """
+ :return: transforms to be applied before resizing the image
+ """
+ return self._pre_resize_transforms
+
+ @property
+ def post_resize_transforms(self) -> SplitsTransforms:
+ """
+ :return: transforms to be applied after resizing the image
+ """
+ return self._post_resize_transforms
+
+ @property
+ def num_images(self) -> int:
+ """
+ :return: the number of images found for the dataset
+ """
+ return self._num_images
+
+ @property
+ def num_classes(self):
+ """
+ :return: the number of classes found for the dataset
+ """
+ return self._num_classes
+
+[docs] def processor(self, file_path: tensorflow.Tensor, label: tensorflow.Tensor):
+ """
+ :param file_path: the path to the file to load an image from
+ :param label: the label for the given image
+ :return: a tuple containing the processed image and label
+ """
+ img = tensorflow.io.read_file(file_path)
+ img = tensorflow.image.decode_jpeg(img, channels=3)
+ if self.pre_resize_transforms:
+ transforms = (
+ self.pre_resize_transforms.train
+ if self.train
+ else self.pre_resize_transforms.val
+ )
+ if transforms:
+ for trans in transforms:
+ img = trans(img)
+ if self._image_size is not None:
+ img = tensorflow.image.resize(img, self.image_size)
+
+ if self.post_resize_transforms:
+ transforms = (
+ self.post_resize_transforms.train
+ if self.train
+ else self.post_resize_transforms.val
+ )
+ if transforms:
+ for trans in transforms:
+ img = trans(img)
+ return img, label
+
+[docs] def creator(self):
+ """
+ :return: a created dataset that gives the file_path and label for each
+ image under self.root
+ """
+ labels_strs = [
+ fold.split(os.path.sep)[-1]
+ for fold in glob.glob(os.path.join(self.root, "*"))
+ ]
+ labels_strs.sort()
+ labels_dict = {
+ lab: numpy.identity(len(labels_strs))[index].tolist()
+ for index, lab in enumerate(labels_strs)
+ }
+ files_labels = [
+ (file, labels_dict[file.split(os.path.sep)[-2]])
+ for file in glob.glob(os.path.join(self.root, "*", "*"))
+ ]
+ random.Random(42).shuffle(files_labels)
+ files, labels = zip(*files_labels)
+ files = tensorflow.constant(files)
+ labels = tensorflow.constant(labels)
+
+ return tensorflow.data.Dataset.from_tensor_slices((files, labels))
+
Source code for sparseml.keras.datasets.classification.imagenet
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Imagenet dataset implementations for the image classification field in computer vision.
+More info for the dataset can be found `here <http://www.image-net.org/>`__.
+"""
+
+import random
+from typing import Tuple, Union
+
+import tensorflow as tf
+
+from sparseml.keras.datasets.classification import (
+ ImageFolderDataset,
+ SplitsTransforms,
+ imagenet_normalizer,
+)
+from sparseml.keras.datasets.helpers import random_scaling_crop
+from sparseml.keras.datasets.registry import DatasetRegistry
+from sparseml.keras.utils import keras
+from sparseml.utils import clean_path
+from sparseml.utils.datasets import (
+ IMAGENET_RGB_MEANS,
+ IMAGENET_RGB_STDS,
+ default_dataset_path,
+)
+
+
+__all__ = ["ImageNetDataset"]
+
+
+def torch_imagenet_normalizer():
+ def normalizer(image: tf.Tensor):
+ return imagenet_normalizer(image, "torch")
+
+ return normalizer
+
+
+def imagenet_pre_resize_processor():
+ def processor(image: tf.Tensor):
+ image_batch = tf.expand_dims(image, axis=0)
+
+ # Resize the image the following way to match torchvision's Resize
+ # transform used by Pytorch code path for Imagenet:
+ # torchvision.transforms.Resize(256)
+ # which resize the smaller side of images to 256 and the other one based
+ # on the aspect ratio
+ shape = tf.shape(image)
+ h, w = shape[0], shape[1]
+ if h > w:
+ new_h, new_w = tf.cast(256 * h / w, dtype=tf.uint16), tf.constant(
+ 256, dtype=tf.uint16
+ )
+ else:
+ new_h, new_w = tf.constant(256, dtype=tf.uint16), tf.cast(
+ 256 * w / h, dtype=tf.uint16
+ )
+ resizer = keras.layers.experimental.preprocessing.Resizing(new_h, new_w)
+ image_batch = tf.cast(resizer(image_batch), dtype=tf.uint8)
+
+ # Center crop
+ center_cropper = keras.layers.experimental.preprocessing.CenterCrop(224, 224)
+ image_batch = tf.cast(center_cropper(image_batch), dtype=tf.uint8)
+
+ return image_batch[0, :]
+
+ return processor
+
+
+[docs]@DatasetRegistry.register(
+ key=["imagenet"],
+ attributes={
+ "num_classes": 1000,
+ "transform_means": IMAGENET_RGB_MEANS,
+ "transform_stds": IMAGENET_RGB_STDS,
+ },
+)
+class ImageNetDataset(ImageFolderDataset):
+ """
+ Wrapper for the ImageNet dataset to apply standard transforms.
+
+ :param root: The root folder to find the dataset at
+ :param train: True if this is for the training distribution,
+ False for the validation
+ :param rand_trans: True to apply RandomCrop and RandomHorizontalFlip to the data,
+ False otherwise
+ :param image_size: the size of the image to output from the dataset
+ """
+
+ def __init__(
+ self,
+ root: str = default_dataset_path("imagenet"),
+ train: bool = True,
+ rand_trans: bool = False,
+ image_size: Union[None, int, Tuple[int, int]] = 224,
+ pre_resize_transforms=SplitsTransforms(
+ train=(
+ random_scaling_crop(),
+ tf.image.random_flip_left_right,
+ ),
+ val=(imagenet_pre_resize_processor(),),
+ ),
+ post_resize_transforms=SplitsTransforms(
+ train=(torch_imagenet_normalizer(),), val=(torch_imagenet_normalizer(),)
+ ),
+ ):
+ root = clean_path(root)
+ super().__init__(
+ root,
+ train,
+ image_size=image_size,
+ pre_resize_transforms=pre_resize_transforms,
+ post_resize_transforms=post_resize_transforms,
+ )
+
+ if train:
+ # make sure we don't preserve the folder structure class order
+ random.shuffle(self.samples)
+
Source code for sparseml.keras.datasets.classification.imagenette
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Imagenette and Imagewoof dataset implementations for the image classification field in
+computer vision.
+More info for the dataset can be found `here <https://github.com/fastai/imagenette>`__.
+"""
+
+from typing import Union
+
+from sparseml.keras.datasets.classification.imagefolder import ImageFolderDataset
+from sparseml.keras.datasets.registry import DatasetRegistry
+from sparseml.utils.datasets import (
+ IMAGENET_RGB_MEANS,
+ IMAGENET_RGB_STDS,
+ ImagenetteDownloader,
+ ImagenetteSize,
+ default_dataset_path,
+)
+
+
+__all__ = ["ImagenetteDataset"]
+
+
+[docs]@DatasetRegistry.register(
+ key=["imagenette"],
+ attributes={
+ "num_classes": 10,
+ "transform_means": IMAGENET_RGB_MEANS,
+ "transform_stds": IMAGENET_RGB_STDS,
+ },
+)
+class ImagenetteDataset(ImageFolderDataset, ImagenetteDownloader):
+ """
+ Wrapper for the imagenette (10 class) dataset that fastai created.
+ Handles downloading and applying standard transforms.
+ :param root: The root folder to find the dataset at,
+ if not found will download here if download=True
+ :param train: True if this is for the training distribution,
+ False for the validation
+ :param dataset_size: The size of the dataset to use and download:
+ See ImagenetteSize for options
+ :param image_size: The image size to output from the dataset
+ :param download: True to download the dataset, False otherwise
+ """
+
+ def __init__(
+ self,
+ root: str = default_dataset_path("imagenette"),
+ train: bool = True,
+ dataset_size: ImagenetteSize = ImagenetteSize.s320,
+ image_size: Union[int, None] = None,
+ download: bool = True,
+ ):
+ ImagenetteDownloader.__init__(self, root, dataset_size, download)
+ self._train = train
+
+ if image_size is None:
+ if dataset_size == ImagenetteSize.s160:
+ image_size = 160
+ elif dataset_size == ImagenetteSize.s320:
+ image_size = 320
+ else:
+ image_size = 224
+
+ super().__init__(self.extracted_root, train, image_size)
+
Source code for sparseml.keras.datasets.dataset
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+General dataset implementations for Keras
+"""
+
+from abc import ABCMeta, abstractmethod
+
+import tensorflow
+
+
+__all__ = [
+ "Dataset",
+]
+
+
+[docs]class Dataset(metaclass=ABCMeta):
+ """
+ Generic dataset implementation for Keras.
+ Expected to work with the tensorflow.data APIs
+ """
+
+ @abstractmethod
+ def __len__(self):
+ raise NotImplementedError()
+
+[docs] def build(
+ self,
+ batch_size: int,
+ repeat_count: int = None,
+ shuffle_buffer_size: int = None,
+ prefetch_buffer_size: int = None,
+ num_parallel_calls: int = None,
+ ) -> tensorflow.data.Dataset:
+ """
+ Create the dataset in the current graph using tensorflow.data APIs
+ :param batch_size: the batch size to create the dataset for
+ :param repeat_count: the number of times to repeat the dataset,
+ if unset or None, will repeat indefinitely
+ :param shuffle_buffer_size: None if not shuffling,
+ otherwise the size of the buffer to use for shuffling data
+ :param prefetch_buffer_size: None if not prefetching,
+ otherwise the size of the buffer to use for buffering
+ :param num_parallel_calls: the number of parallel calls to run the
+ processor function with
+ :return: a tensorflow.data.Dataset instance
+ """
+ dataset = self.creator()
+
+ if shuffle_buffer_size and shuffle_buffer_size > 0:
+ dataset = dataset.shuffle(
+ shuffle_buffer_size, reshuffle_each_iteration=True
+ )
+
+ dataset = dataset.map(self.processor, num_parallel_calls=num_parallel_calls)
+
+ # Together with shuffling above, putting batch after repeat yields
+ # batches that straddle epoch boundaries
+ dataset = dataset.repeat(repeat_count)
+ dataset = dataset.batch(batch_size)
+
+ if prefetch_buffer_size and prefetch_buffer_size > 0:
+ dataset = dataset.prefetch(prefetch_buffer_size)
+
+ return dataset
+
+[docs] @abstractmethod
+ def creator(self) -> tensorflow.data.Dataset:
+ """
+ Implemented by sub classes to create a tensorflow.data dataset for the given impl.
+ :return: a created tensorflow.data dataset
+ """
+ raise NotImplementedError()
+
+[docs] @abstractmethod
+ def processor(self, *args, **kwargs):
+ """
+ Implemented by sub classes to parallelize and map processing functions
+ for loading the data of the dataset into memory.
+ :param args: generic inputs for processing
+ :param kwargs: generic inputs for processing
+ :return: the processed tensors
+ """
+ raise NotImplementedError()
+
Source code for sparseml.keras.datasets.helpers
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+General utilities for dataset implementations for Keras
+"""
+
+from typing import Tuple
+
+import tensorflow
+
+
+__all__ = [
+ "random_scaling_crop",
+]
+
+
+[docs]def random_scaling_crop(
+ scale_range: Tuple[int, int] = (0.8, 1.0),
+ ratio_range: Tuple[int, int] = (3.0 / 4.0, 4.0 / 3.0),
+):
+ """
+ Random crop implementation which also randomly scales the crop taken
+ as well as the aspect ratio of the crop.
+ :param scale_range: the (min, max) of the crop scales to take from the orig image
+ :param ratio_range: the (min, max) of the aspect ratios to take from the orig image
+ :return: the callable function for random scaling crop op,
+ takes in the image and outputs randomly cropped image
+ """
+
+ def rand_crop(img: tensorflow.Tensor):
+ orig_shape = tensorflow.shape(img)
+ scale = tensorflow.random.uniform(
+ shape=[1], minval=scale_range[0], maxval=scale_range[1]
+ )[0]
+ ratio = tensorflow.random.uniform(
+ shape=[1], minval=ratio_range[0], maxval=ratio_range[1]
+ )[0]
+ height = tensorflow.minimum(
+ tensorflow.cast(
+ tensorflow.round(
+ tensorflow.cast(orig_shape[0], dtype=tensorflow.float32)
+ * scale
+ / ratio
+ ),
+ tensorflow.int32,
+ ),
+ orig_shape[0],
+ )
+ width = tensorflow.minimum(
+ tensorflow.cast(
+ tensorflow.round(
+ tensorflow.cast(orig_shape[1], dtype=tensorflow.float32) * scale
+ ),
+ tensorflow.int32,
+ ),
+ orig_shape[1],
+ )
+ img = tensorflow.image.random_crop(img, [height, width, orig_shape[2]])
+
+ return img
+
+ return rand_crop
+
Source code for sparseml.keras.datasets.registry
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Code related to the Keras dataset registry for easily creating datasets.
+"""
+
+from typing import Any, Dict, List, Union
+
+
+__all__ = ["DatasetRegistry"]
+
+
+[docs]class DatasetRegistry(object):
+ """
+ Registry class for creating datasets
+ """
+
+ _CONSTRUCTORS = {}
+ _ATTRIBUTES = {}
+
+[docs] @staticmethod
+ def create(key: str, *args, **kwargs):
+ """
+ Create a new dataset for the given key
+ :param key: the dataset key (name) to create
+ :return: the instantiated model
+ """
+ if key not in DatasetRegistry._CONSTRUCTORS:
+ raise ValueError(
+ "key {} is not in the model registry; available: {}".format(
+ key, DatasetRegistry._CONSTRUCTORS.keys()
+ )
+ )
+
+ return DatasetRegistry._CONSTRUCTORS[key](*args, **kwargs)
+
+[docs] @staticmethod
+ def attributes(key: str) -> Dict[str, Any]:
+ """
+ :param key: the dataset key (name) to create
+ :return: the specified attributes for the dataset
+ """
+ if key not in DatasetRegistry._CONSTRUCTORS:
+ raise ValueError(
+ "key {} is not in the model registry; available: {}".format(
+ key, DatasetRegistry._CONSTRUCTORS.keys()
+ )
+ )
+
+ return DatasetRegistry._ATTRIBUTES[key]
+
+[docs] @staticmethod
+ def register(key: Union[str, List[str]], attributes: Dict[str, Any]):
+ """
+ Register a dataset with the registry. Should be used as a decorator
+ :param key: the model key (name) to create
+ :param attributes: the specified attributes for the dataset
+ :return: the decorator
+ """
+ if not isinstance(key, List):
+ key = [key]
+
+ def decorator(const_func):
+ for r_key in key:
+ if r_key in DatasetRegistry._CONSTRUCTORS:
+ raise ValueError("key {} is already registered".format(key))
+
+ DatasetRegistry._CONSTRUCTORS[r_key] = const_func
+ DatasetRegistry._ATTRIBUTES[r_key] = attributes
+
+ return const_func
+
+ return decorator
+
Source code for sparseml.keras.models.classification.resnet
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Keras ResNet implementation.
+Further info on ResNet can be found in the paper
+`here <https://arxiv.org/abs/1512.03385>`__.
+"""
+
+from typing import List, Union
+
+import tensorflow
+from tensorflow.keras import backend as K
+from tensorflow.keras import layers
+from tensorflow.keras.models import Model
+
+from sparseml.keras.models.registry import ModelRegistry
+from sparseml.keras.utils import keras
+
+
+__all__ = ["ResNetSection", "resnet_const", "resnet50", "resnet101", "resnet152"]
+
+
+BN_MOMENTUM = 0.9
+BN_EPSILON = 1e-5
+
+BASE_NAME_SCOPE = "resnet"
+
+
+def _expand_name(prefix: str, suffix: str, sep: str = "."):
+ return prefix + sep + suffix
+
+
+def _input(
+ name: str,
+ x_tens: tensorflow.Tensor,
+ training: Union[bool, tensorflow.Tensor],
+ kernel_initializer,
+ bias_initializer,
+ beta_initializer,
+ gamma_initializer,
+) -> tensorflow.Tensor:
+ bn_axis = 3 if K.image_data_format() == "channels_last" else 1
+
+ x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)), name=_expand_name(name, "pad"))(
+ x_tens
+ )
+ x = layers.Conv2D(
+ 64, 7, strides=2, use_bias=False, name=_expand_name(name, "conv")
+ )(x)
+ x = layers.BatchNormalization(
+ axis=bn_axis, epsilon=BN_EPSILON, name=_expand_name(name, "bn")
+ )(x)
+ x = layers.Activation("relu", name=_expand_name(name, "relu"))(x)
+ x = layers.ZeroPadding2D(
+ padding=((1, 1), (1, 1)), name=_expand_name(name, "pool_pad")
+ )(x)
+ x = layers.MaxPooling2D(3, strides=2, name=_expand_name(name, "pool_pool"))(x)
+
+ return x
+
+
+def _identity_modifier(
+ name: str,
+ x_tens: tensorflow.Tensor,
+ training: Union[bool, tensorflow.Tensor],
+ out_channels: int,
+ stride: int,
+ kernel_initializer,
+ bias_initializer,
+ beta_initializer,
+ gamma_initializer,
+) -> tensorflow.Tensor:
+ bn_axis = 3 if K.image_data_format() == "channels_last" else 1
+ shortcut = layers.Conv2D(
+ out_channels, 1, strides=stride, name=_expand_name(name, "conv")
+ )(x_tens)
+ shortcut = layers.BatchNormalization(
+ axis=bn_axis, epsilon=BN_EPSILON, name=_expand_name(name, "bn")
+ )(shortcut)
+ return shortcut
+
+
+def _bottleneck_block(
+ name: str,
+ x_tens: tensorflow.Tensor,
+ training: Union[bool, tensorflow.Tensor],
+ out_channels: int,
+ proj_channels: int,
+ stride: int,
+ kernel_initializer,
+ bias_initializer,
+ beta_initializer,
+ gamma_initializer,
+) -> tensorflow.Tensor:
+ bn_axis = 3 if K.image_data_format() == "channels_last" else 1
+
+ x = layers.Conv2D(proj_channels, 1, name=_expand_name(name, "conv1"))(x_tens)
+ x = layers.BatchNormalization(
+ axis=bn_axis, epsilon=BN_EPSILON, name=_expand_name(name, "bn1")
+ )(x)
+ x = layers.Activation("relu", name=_expand_name(name, "relu1"))(x)
+
+ x = layers.ZeroPadding2D(
+ padding=((1, 1), (1, 1)), name=_expand_name(name, "pad_conv2")
+ )(x)
+ x = layers.Conv2D(
+ proj_channels, 3, strides=stride, name=_expand_name(name, "conv2")
+ )(x)
+ x = layers.BatchNormalization(
+ axis=bn_axis, epsilon=BN_EPSILON, name=_expand_name(name, "bn2")
+ )(x)
+ x = layers.Activation("relu", name=_expand_name(name, "relu2"))(x)
+
+ x = layers.Conv2D(out_channels, 1, name=_expand_name(name, "conv3"))(x)
+ x = layers.BatchNormalization(
+ axis=bn_axis, epsilon=BN_EPSILON, name=_expand_name(name, "bn3")
+ )(x)
+
+ if stride > 1 or int(x_tens.shape[3]) != out_channels:
+ shortcut = _identity_modifier(
+ _expand_name(name, "identity"),
+ x_tens,
+ training,
+ out_channels,
+ stride,
+ kernel_initializer=kernel_initializer,
+ bias_initializer=bias_initializer,
+ beta_initializer=beta_initializer,
+ gamma_initializer=gamma_initializer,
+ )
+ else:
+ shortcut = x_tens
+
+ x = layers.Add(name=_expand_name(name, "add"))([shortcut, x])
+ x = layers.Activation("relu", name=_expand_name(name, "out"))(x)
+
+ return x
+
+
+def _classifier(
+ name: str,
+ x_tens: tensorflow.Tensor,
+ training: Union[bool, tensorflow.Tensor],
+ num_classes: int,
+ class_type: str,
+ kernel_initializer,
+ bias_initializer,
+ beta_initializer,
+ gamma_initializer,
+) -> tensorflow.Tensor:
+ x = layers.GlobalAveragePooling2D(name="avg_pool")(x_tens)
+ if num_classes:
+ if class_type:
+ if class_type == "single":
+ act = "softmax"
+ elif class_type == "multi":
+ act = "sigmoid"
+ else:
+ raise ValueError("unknown class_type given of {}".format(class_type))
+ else:
+ act = None
+
+ outputs = layers.Dense(
+ num_classes, activation=act, name=_expand_name(name, "fc")
+ )(x)
+ else:
+ outputs = x
+ return outputs
+
+
+[docs]class ResNetSection(object):
+ """
+ Settings to describe how to put together a ResNet based architecture
+ using user supplied configurations.
+
+ :param num_blocks: the number of blocks to put in the section
+ (ie Basic or Bottleneck blocks)
+ :param out_channels: the number of output channels from the section
+ :param downsample: True to apply stride 2 for downsampling of the input,
+ False otherwise
+ :param proj_channels: The number of channels in the projection for a
+ bottleneck block, if < 0 then uses basic
+ """
+
+ def __init__(
+ self,
+ num_blocks: int,
+ out_channels: int,
+ downsample: bool,
+ proj_channels: int = -1,
+ ):
+ self.num_blocks = num_blocks
+ self.out_channels = out_channels
+ self.downsample = downsample
+ self.proj_channels = proj_channels
+
+[docs] def create(
+ self,
+ name: str,
+ x_tens: tensorflow.Tensor,
+ training: Union[bool, tensorflow.Tensor],
+ kernel_initializer,
+ bias_initializer,
+ beta_initializer,
+ gamma_initializer,
+ ) -> tensorflow.Tensor:
+ """
+ Create the section in the current graph and scope
+
+ :param name: the name for the scope to create the section under
+ :param x_tens: The input tensor to the ResNet architecture
+ :param training: bool or Tensor to specify if the model should be run
+ in training or inference mode
+ :param kernel_initializer: Initializer to use for the conv and
+ fully connected kernels
+ :param bias_initializer: Initializer to use for the bias in the fully connected
+ :param beta_initializer: Initializer to use for the batch norm beta variables
+ :param gamma_initializer: Initializer to use for the batch norm gama variables
+ :return: the output tensor from the section
+ """
+ out = x_tens
+
+ stride = 2 if self.downsample else 1
+
+ for block in range(self.num_blocks):
+ block_name = _expand_name(name, "{}".format(block))
+ if self.proj_channels > 0:
+ out = _bottleneck_block(
+ name=block_name,
+ x_tens=out,
+ training=training,
+ out_channels=self.out_channels,
+ proj_channels=self.proj_channels,
+ stride=stride,
+ kernel_initializer=kernel_initializer,
+ bias_initializer=bias_initializer,
+ beta_initializer=beta_initializer,
+ gamma_initializer=gamma_initializer,
+ )
+ else:
+ out = _basic_block(
+ name=block_name,
+ x_tens=out,
+ training=training,
+ out_channels=self.out_channels,
+ stride=stride,
+ kernel_initializer=kernel_initializer,
+ bias_initializer=bias_initializer,
+ beta_initializer=beta_initializer,
+ gamma_initializer=gamma_initializer,
+ )
+
+ stride = 1
+
+ return out
+
+
+[docs]def resnet_const(
+ x_tens: tensorflow.Tensor,
+ training: Union[bool, tensorflow.Tensor],
+ sec_settings: List[ResNetSection],
+ num_classes: int,
+ class_type: str,
+ kernel_initializer,
+ bias_initializer,
+ beta_initializer,
+ gamma_initializer,
+) -> keras.models.Model:
+ """
+ Graph constructor for ResNet implementation.
+
+ :param x_tens: The input tensor to the ResNet architecture
+ :param training: bool or Tensor to specify if the model should be run
+ in training or inference mode
+ :param sec_settings: The settings for each section in the ResNet modoel
+ :param num_classes: The number of classes to classify
+ :param class_type: One of [single, multi, None] to support multi class training.
+ Default single. If None, then will not add the fully connected at the end.
+ :param kernel_initializer: Initializer to use for the conv and
+ fully connected kernels
+ :param bias_initializer: Initializer to use for the bias in the fully connected
+ :param beta_initializer: Initializer to use for the batch norm beta variables
+ :param gamma_initializer: Initializer to use for the batch norm gama variables
+ :return: the output tensor from the created graph
+ """
+ channels_last = K.image_data_format() == "channels_last"
+ if x_tens is None:
+ input_shape = (224, 224, 3) if channels_last else (3, 224, 224)
+ x_tens = layers.Input(shape=input_shape)
+
+ out = _input(
+ "input",
+ x_tens,
+ training,
+ kernel_initializer,
+ bias_initializer,
+ beta_initializer,
+ gamma_initializer,
+ )
+
+ for sec_index, section in enumerate(sec_settings):
+ out = section.create(
+ name="sections.{}".format(sec_index),
+ x_tens=out,
+ training=training,
+ kernel_initializer=kernel_initializer,
+ bias_initializer=bias_initializer,
+ beta_initializer=beta_initializer,
+ gamma_initializer=gamma_initializer,
+ )
+
+ outputs = _classifier(
+ "classifier",
+ out,
+ training,
+ num_classes,
+ class_type,
+ kernel_initializer,
+ bias_initializer,
+ beta_initializer,
+ gamma_initializer,
+ )
+
+ return Model(inputs=x_tens, outputs=outputs)
+
+
+[docs]@ModelRegistry.register(
+ key=["resnet50", "resnet_50", "resnet-50", "resnetv1_50", "resnetv1-50"],
+ input_shape=(224, 224, 3),
+ domain="cv",
+ sub_domain="classification",
+ architecture="resnet_v1",
+ sub_architecture="50",
+ default_dataset="imagenet",
+ default_desc="base",
+)
+def resnet50(
+ inputs: tensorflow.Tensor = None,
+ training: Union[bool, tensorflow.Tensor] = True,
+ num_classes: int = 1000,
+ class_type: str = None,
+ kernel_initializer=keras.initializers.GlorotUniform(),
+ bias_initializer=keras.initializers.GlorotUniform(),
+ beta_initializer=keras.initializers.GlorotUniform(),
+ gamma_initializer=keras.initializers.GlorotUniform(),
+) -> keras.models.Model:
+ """
+ Standard ResNet50 implementation;
+ expected input shape is (B, 224, 224, 3)
+
+ :param inputs: The input tensor to the ResNet architecture
+ :param training: bool or Tensor to specify if the model should be run
+ in training or inference mode
+ :param num_classes: The number of classes to classify
+ :param class_type: One of [single, multi, None] to support multi class training.
+ Default single. If None, then will not add the fully connected at the end.
+ :param kernel_initializer: Initializer to use for the conv and
+ fully connected kernels
+ :param bias_initializer: Initializer to use for the bias in the fully connected
+ :param beta_initializer: Initializer to use for the batch norm beta variables
+ :param gamma_initializer: Initializer to use for the batch norm gama variables
+ :return: the output tensor from the created graph
+ """
+ sec_settings = [
+ ResNetSection(
+ num_blocks=3,
+ out_channels=256,
+ downsample=False,
+ proj_channels=64,
+ ),
+ ResNetSection(
+ num_blocks=4,
+ out_channels=512,
+ downsample=True,
+ proj_channels=128,
+ ),
+ ResNetSection(
+ num_blocks=6,
+ out_channels=1024,
+ downsample=True,
+ proj_channels=256,
+ ),
+ ResNetSection(
+ num_blocks=3,
+ out_channels=2048,
+ downsample=True,
+ proj_channels=512,
+ ),
+ ]
+
+ return resnet_const(
+ inputs,
+ training,
+ sec_settings,
+ num_classes,
+ class_type,
+ kernel_initializer,
+ bias_initializer,
+ beta_initializer,
+ gamma_initializer,
+ )
+
+
+[docs]@ModelRegistry.register(
+ key=["resnet101", "resnet_101", "resnet-101", "resnetv1_101", "resnetv1-101"],
+ input_shape=(224, 224, 3),
+ domain="cv",
+ sub_domain="classification",
+ architecture="resnet_v1",
+ sub_architecture="101",
+ default_dataset="imagenet",
+ default_desc="base",
+)
+def resnet101(
+ inputs: tensorflow.Tensor = None,
+ training: Union[bool, tensorflow.Tensor] = True,
+ num_classes: int = 1000,
+ class_type: str = None,
+ kernel_initializer=keras.initializers.GlorotUniform(),
+ bias_initializer=keras.initializers.GlorotUniform(),
+ beta_initializer=keras.initializers.GlorotUniform(),
+ gamma_initializer=keras.initializers.GlorotUniform(),
+) -> keras.models.Model:
+ """
+ Standard ResNet101 implementation;
+ expected input shape is (B, 224, 224, 3)
+
+ :param inputs: The input tensor to the ResNet architecture
+ :param training: bool or Tensor to specify if the model should be run
+ in training or inference mode
+ :param num_classes: The number of classes to classify
+ :param class_type: One of [single, multi, None] to support multi class training.
+ Default single. If None, then will not add the fully connected at the end.
+ :param kernel_initializer: Initializer to use for the conv and
+ fully connected kernels
+ :param bias_initializer: Initializer to use for the bias in the fully connected
+ :param beta_initializer: Initializer to use for the batch norm beta variables
+ :param gamma_initializer: Initializer to use for the batch norm gama variables
+ :return: the output tensor from the created graph
+ """
+ sec_settings = [
+ ResNetSection(
+ num_blocks=3,
+ out_channels=256,
+ downsample=False,
+ proj_channels=64,
+ ),
+ ResNetSection(
+ num_blocks=4,
+ out_channels=512,
+ downsample=True,
+ proj_channels=128,
+ ),
+ ResNetSection(
+ num_blocks=23,
+ out_channels=1024,
+ downsample=True,
+ proj_channels=256,
+ ),
+ ResNetSection(
+ num_blocks=3,
+ out_channels=2048,
+ downsample=True,
+ proj_channels=512,
+ ),
+ ]
+
+ return resnet_const(
+ inputs,
+ training,
+ sec_settings,
+ num_classes,
+ class_type,
+ kernel_initializer,
+ bias_initializer,
+ beta_initializer,
+ gamma_initializer,
+ )
+
+
+[docs]@ModelRegistry.register(
+ key=["resnet152", "resnet_152", "resnet-152", "resnetv1_152", "resnetv1-152"],
+ input_shape=(224, 224, 3),
+ domain="cv",
+ sub_domain="classification",
+ architecture="resnet_v1",
+ sub_architecture="152",
+ default_dataset="imagenet",
+ default_desc="base",
+)
+def resnet152(
+ inputs: tensorflow.Tensor = None,
+ training: Union[bool, tensorflow.Tensor] = True,
+ num_classes: int = 1000,
+ class_type: str = None,
+ kernel_initializer=keras.initializers.GlorotUniform(),
+ bias_initializer=keras.initializers.GlorotUniform(),
+ beta_initializer=keras.initializers.GlorotUniform(),
+ gamma_initializer=keras.initializers.GlorotUniform(),
+) -> keras.models.Model:
+ """
+ Standard ResNet152 implementation;
+ expected input shape is (B, 224, 224, 3)
+
+ :param inputs: The input tensor to the ResNet architecture
+ :param training: bool or Tensor to specify if the model should be run
+ in training or inference mode
+ :param num_classes: The number of classes to classify
+ :param class_type: One of [single, multi, None] to support multi class training.
+ Default single. If None, then will not add the fully connected at the end.
+ :param kernel_initializer: Initializer to use for the conv and
+ fully connected kernels
+ :param bias_initializer: Initializer to use for the bias in the fully connected
+ :param beta_initializer: Initializer to use for the batch norm beta variables
+ :param gamma_initializer: Initializer to use for the batch norm gama variables
+ :return: the output tensor from the created graph
+ """
+ sec_settings = [
+ ResNetSection(
+ num_blocks=3,
+ out_channels=256,
+ downsample=False,
+ proj_channels=64,
+ ),
+ ResNetSection(
+ num_blocks=8,
+ out_channels=512,
+ downsample=True,
+ proj_channels=128,
+ ),
+ ResNetSection(
+ num_blocks=36,
+ out_channels=1024,
+ downsample=True,
+ proj_channels=256,
+ ),
+ ResNetSection(
+ num_blocks=3,
+ out_channels=2048,
+ downsample=True,
+ proj_channels=512,
+ ),
+ ]
+
+ return resnet_const(
+ inputs,
+ training,
+ sec_settings,
+ num_classes,
+ class_type,
+ kernel_initializer,
+ bias_initializer,
+ beta_initializer,
+ gamma_initializer,
+ )
+
Source code for sparseml.keras.models.registry
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Code related to the Keras model registry for easily creating models.
+"""
+
+from typing import Any, Callable, Dict, List, NamedTuple, Tuple, Union
+
+from merge_args import merge_args
+from sparseml import get_main_logger
+from sparseml.keras.utils import keras
+from sparseml.utils import KERAS_FRAMEWORK, parse_optimization_str, wrapper_decorator
+from sparsezoo import Zoo
+from sparsezoo.objects import Model
+
+
+__all__ = [
+ "ModelRegistry",
+]
+
+
+_LOGGER = get_main_logger()
+
+"""
+Simple named tuple object to store model info
+"""
+_ModelAttributes = NamedTuple(
+ "_ModelAttributes",
+ [
+ ("input_shape", Any),
+ ("domain", str),
+ ("sub_domain", str),
+ ("architecture", str),
+ ("sub_architecture", str),
+ ("default_dataset", str),
+ ("default_desc", str),
+ ("repo_source", str),
+ ],
+)
+
+
+[docs]class ModelRegistry(object):
+ """
+ Registry class for creating models
+ """
+
+ _CONSTRUCTORS = {} # type: Dict[str, Callable]
+ _ATTRIBUTES = {} # type: Dict[str, _ModelAttributes]
+
+[docs] @staticmethod
+ def available_keys() -> List[str]:
+ """
+ :return: the keys (models) currently available in the registry
+ """
+ return list(ModelRegistry._CONSTRUCTORS.keys())
+
+[docs] @staticmethod
+ def create(
+ key: str,
+ pretrained: Union[bool, str] = False,
+ pretrained_path: str = None,
+ pretrained_dataset: str = None,
+ **kwargs,
+ ) -> keras.Model:
+ """
+ Create a new model for the given key
+
+ :param key: the model key (name) to create
+ :param pretrained: True to load pretrained weights; to load a specific version
+ give a string with the name of the version (pruned-moderate, base).
+ Default None
+ :param pretrained_path: A model file path to load into the created model
+ :param pretrained_dataset: The dataset to load for the model
+ :param kwargs: any keyword args to supply to the model constructor
+ :return: the instantiated model
+ """
+ if key not in ModelRegistry._CONSTRUCTORS:
+ raise ValueError(
+ "key {} is not in the model registry; available: {}".format(
+ key, ModelRegistry._CONSTRUCTORS
+ )
+ )
+
+ return ModelRegistry._CONSTRUCTORS[key](
+ pretrained=pretrained,
+ pretrained_path=pretrained_path,
+ pretrained_dataset=pretrained_dataset,
+ **kwargs,
+ )
+
+[docs] @staticmethod
+ def create_zoo_model(
+ key: str,
+ pretrained: Union[bool, str] = True,
+ pretrained_dataset: str = None,
+ ) -> Model:
+ """
+ Create a sparsezoo Model for the desired model in the zoo
+
+ :param key: the model key (name) to retrieve
+ :param pretrained: True to load pretrained weights; to load a specific version
+ give a string with the name of the version (optim, optim-perf), default True
+ :param pretrained_dataset: The dataset to load for the model
+ :return: the sparsezoo Model reference for the given model
+ """
+ if key not in ModelRegistry._CONSTRUCTORS:
+ raise ValueError(
+ "key {} is not in the model registry; available: {}".format(
+ key, ModelRegistry._CONSTRUCTORS
+ )
+ )
+
+ attributes = ModelRegistry._ATTRIBUTES[key]
+
+ optim_name, optim_category, optim_target = parse_optimization_str(
+ pretrained if isinstance(pretrained, str) else attributes.default_desc
+ )
+
+ return Zoo.load_model(
+ attributes.domain,
+ attributes.sub_domain,
+ attributes.architecture,
+ attributes.sub_architecture,
+ KERAS_FRAMEWORK,
+ attributes.repo_source,
+ attributes.default_dataset
+ if pretrained_dataset is None
+ else pretrained_dataset,
+ None,
+ optim_name,
+ optim_category,
+ optim_target,
+ )
+
+[docs] @staticmethod
+ def input_shape(key: str) -> Any:
+ """
+ :param key: the model key (name) to create
+ :return: the specified input shape for the model
+ """
+ if key not in ModelRegistry._CONSTRUCTORS:
+ raise ValueError(
+ "key {} is not in the model registry; available: {}".format(
+ key, ModelRegistry._CONSTRUCTORS
+ )
+ )
+
+ return ModelRegistry._ATTRIBUTES[key].input_shape
+
+[docs] @staticmethod
+ def register(
+ key: Union[str, List[str]],
+ input_shape: Any,
+ domain: str,
+ sub_domain: str,
+ architecture: str,
+ sub_architecture: str,
+ default_dataset: str,
+ default_desc: str,
+ repo_source: str = "sparseml",
+ ):
+ """
+ Register a model with the registry. Should be used as a decorator
+
+ :param key: the model key (name) to create
+ :param input_shape: the specified input shape for the model
+ :param domain: the domain the model belongs to; ex: cv, nlp, etc
+ :param sub_domain: the sub domain the model belongs to;
+ ex: classification, detection, etc
+ :param architecture: the architecture the model belongs to;
+ ex: resnet, mobilenet, etc
+ :param sub_architecture: the sub architecture the model belongs to;
+ ex: 50, 101, etc
+ :param default_dataset: the dataset to use by default for loading
+ pretrained if not supplied
+ :param default_desc: the description to use by default for loading
+ pretrained if not supplied
+ :param repo_source: the source repo for the model, default is sparseml
+ :return: the decorator
+ """
+ if not isinstance(key, List):
+ key = [key]
+
+ def decorator(const_func):
+ wrapped_constructor = ModelRegistry._registered_wrapper(key[0], const_func)
+
+ ModelRegistry.register_wrapped_model_constructor(
+ wrapped_constructor,
+ key,
+ input_shape,
+ domain,
+ sub_domain,
+ architecture,
+ sub_architecture,
+ default_dataset,
+ default_desc,
+ repo_source,
+ )
+ return wrapped_constructor
+
+ return decorator
+
+[docs] @staticmethod
+ def register_wrapped_model_constructor(
+ wrapped_constructor: Callable,
+ key: Union[str, List[str]],
+ input_shape: Any,
+ domain: str,
+ sub_domain: str,
+ architecture: str,
+ sub_architecture: str,
+ default_dataset: str,
+ default_desc: str,
+ repo_source: str,
+ ):
+ """
+ Register a model with the registry from a model constructor or provider function
+
+ :param wrapped_constructor: Model constructor wrapped to be compatible
+ by call from ModelRegistry.create should have pretrained, pretrained_path,
+ pretrained_dataset, load_strict, ignore_error_tensors, and kwargs as
+ arguments
+ :param key: the model key (name) to create
+ :param input_shape: the specified input shape for the model
+ :param domain: the domain the model belongs to; ex: cv, nlp, etc
+ :param sub_domain: the sub domain the model belongs to;
+ ex: classification, detection, etc
+ :param architecture: the architecture the model belongs to;
+ ex: resnet, mobilenet, etc
+ :param sub_architecture: the sub architecture the model belongs to;
+ ex: 50, 101, etc
+ :param default_dataset: the dataset to use by default for loading
+ pretrained if not supplied
+ :param default_desc: the description to use by default for loading
+ pretrained if not supplied
+ :param repo_source: the source repo for the model; ex: sparseml, torchvision
+ :return: The constructor wrapper registered with the registry
+ """
+ if not isinstance(key, List):
+ key = [key]
+
+ for r_key in key:
+ if r_key in ModelRegistry._CONSTRUCTORS:
+ raise ValueError("key {} is already registered".format(key))
+
+ ModelRegistry._CONSTRUCTORS[r_key] = wrapped_constructor
+ ModelRegistry._ATTRIBUTES[r_key] = _ModelAttributes(
+ input_shape,
+ domain,
+ sub_domain,
+ architecture,
+ sub_architecture,
+ default_dataset,
+ default_desc,
+ repo_source,
+ )
+
+ @staticmethod
+ def _registered_wrapper(
+ key: str,
+ const_func: Callable,
+ ):
+ @merge_args(const_func)
+ @wrapper_decorator(const_func)
+ def wrapper(
+ pretrained_path: str = None,
+ pretrained: Union[bool, str] = False,
+ pretrained_dataset: str = None,
+ *args,
+ **kwargs,
+ ):
+ """
+ :param pretrained_path: A path to the pretrained weights to load,
+ if provided will override the pretrained param
+ :param pretrained: True to load the default pretrained weights,
+ a string to load a specific pretrained weight
+ (ex: base, optim, optim-perf),
+ or False to not load any pretrained weights
+ :param pretrained_dataset: The dataset to load pretrained weights for
+ (ex: imagenet, mnist, etc).
+ If not supplied will default to the one preconfigured for the model.
+ """
+ attributes = ModelRegistry._ATTRIBUTES[key]
+
+ if isinstance(pretrained, str):
+ if pretrained.lower() == "true":
+ pretrained = True
+ elif pretrained.lower() in ["false", "none"]:
+ pretrained = False
+
+ if pretrained_path:
+ model = const_func(*args, **kwargs)
+ try:
+ model.load_weights(pretrained_path)
+ except ValueError:
+ _LOGGER.info("Loading model from {}".format(pretrained_path))
+ model = keras.models.load_model(pretrained_path)
+ elif pretrained:
+ zoo_model = ModelRegistry.create_zoo_model(
+ key, pretrained, pretrained_dataset
+ )
+ model_file_paths = zoo_model.download_framework_files(
+ extensions=[".h5"]
+ )
+ if not model_file_paths:
+ model_file_paths = zoo_model.download_framework_files(
+ extensions=[".tf"]
+ )
+ if not model_file_paths:
+ raise RuntimeError("Error downloading model from SparseZoo")
+ model_file_path = model_file_paths[0]
+ model = keras.models.load_model(model_file_path)
+ else:
+ model = const_func(*args, **kwargs)
+ return model
+
+ return wrapper
+
Source code for sparseml.keras.optim.manager
from typing import List, Union
-import tensorflow as tf
+from tensorflow import Tensor
from sparseml.keras.optim.modifier import Modifier, ScheduledModifier
+from sparseml.keras.utils.compat import keras
from sparseml.keras.utils.logger import KerasLogger
from sparseml.optim import BaseManager
from sparseml.utils import load_recipe_yaml_str
@@ -248,11 +249,11 @@ Source code for sparseml.keras.optim.manager
[docs] def modify(
self,
- model: Union[tf.keras.Model, tf.keras.Sequential],
- optimizer: tf.keras.optimizers.Optimizer,
+ model: Union[keras.Model, keras.Sequential],
+ optimizer: keras.optimizers.Optimizer,
steps_per_epoch: int,
loggers: Union[KerasLogger, List[KerasLogger]] = None,
- input_tensors: tf.Tensor = None,
+ input_tensors: Tensor = None,
):
"""
Modify the model and optimizer based on the requirements of modifiers
@@ -283,14 +284,14 @@ Source code for sparseml.keras.optim.manager
continue
if isinstance(callback, list):
callbacks = callbacks + callback
- elif isinstance(callback, tf.keras.callbacks.Callback):
+ elif isinstance(callback, keras.callbacks.Callback):
callbacks.append(callback)
else:
raise RuntimeError("Invalid callback type")
self._optimizer = optimizer
return model, optimizer, callbacks
-[docs] def finalize(self, model: tf.keras.Model):
+[docs] def finalize(self, model: keras.Model):
"""
Remove extra information related to the modifier from the model that is
not necessary for exporting
diff --git a/sparseml/_modules/sparseml/keras/optim/mask_pruning.html b/sparseml/_modules/sparseml/keras/optim/mask_pruning.html
index c944425cb77..42dea705842 100644
--- a/sparseml/_modules/sparseml/keras/optim/mask_pruning.html
+++ b/sparseml/_modules/sparseml/keras/optim/mask_pruning.html
@@ -194,12 +194,13 @@ Source code for sparseml.keras.optim.mask_pruning
import inspect
from typing import List, Union
-import tensorflow as tf
+import tensorflow
from sparseml.keras.optim.mask_pruning_creator import (
PruningMaskCreator,
load_mask_creator,
)
+from sparseml.keras.utils import keras
__all__ = [
@@ -257,7 +258,7 @@ Source code for sparseml.keras.optim.mask_pruning
if "class_name" not in config:
raise ValueError("The 'class_name' not found in config: {}".format(config))
class_name = config["class_name"]
- return tf.keras.utils.deserialize_keras_object(
+ return keras.utils.deserialize_keras_object(
config,
module_objects=globals(),
custom_objects={class_name: PruningScheduler._REGISTRY[class_name]},
@@ -289,7 +290,7 @@ Source code for sparseml.keras.optim.mask_pruning
pruning_vars: List[MaskedParamInfo],
pruning_scheduler: PruningScheduler,
mask_creator: PruningMaskCreator,
- global_step: tf.Tensor,
+ global_step: tensorflow.Tensor,
):
self._pruning_vars = pruning_vars
self._pruning_scheduler = pruning_scheduler
@@ -298,18 +299,18 @@ Source code for sparseml.keras.optim.mask_pruning
self._update_ready = None
def _is_pruning_step(self) -> bool:
- global_step_val = tf.keras.backend.get_value(self._global_step)
+ global_step_val = keras.backend.get_value(self._global_step)
assert global_step_val >= 0
update_ready = self._pruning_scheduler.should_prune(global_step_val)
return update_ready
def _conditional_training_update(self):
def _no_update_masks_and_weights():
- return tf.no_op("no_update")
+ return tensorflow.no_op("no_update")
def _update_masks_and_weights():
assignments = []
- global_step_val = tf.keras.backend.get_value(self._global_step)
+ global_step_val = keras.backend.get_value(self._global_step)
for masked_param_info in self._pruning_vars:
new_sparsity = self._pruning_scheduler.target_sparsity(global_step_val)
new_mask = self._mask_creator.create_sparsity_mask(
@@ -317,17 +318,17 @@ Source code for sparseml.keras.optim.mask_pruning
)
assignments.append(masked_param_info.mask.assign(new_mask))
assignments.append(masked_param_info.sparsity.assign(new_sparsity))
- masked_param = tf.math.multiply(
+ masked_param = tensorflow.math.multiply(
masked_param_info.param, masked_param_info.mask
)
assignments.append(masked_param_info.param.assign(masked_param))
- return tf.group(assignments)
+ return tensorflow.group(assignments)
update_ready = self._is_pruning_step()
self._update_ready = update_ready
- return tf.cond(
- tf.cast(update_ready, tf.bool),
+ return tensorflow.cond(
+ tensorflow.cast(update_ready, tensorflow.bool),
_update_masks_and_weights,
_no_update_masks_and_weights,
)
@@ -338,11 +339,11 @@ Source code for sparseml.keras.optim.mask_pruning
"""
assignments = []
for masked_param_info in self._pruning_vars:
- masked_param = tf.math.multiply(
+ masked_param = tensorflow.math.multiply(
masked_param_info.param, masked_param_info.mask
)
assignments.append(masked_param_info.param.assign(masked_param))
- return tf.group(assignments)
+ return tensorflow.group(assignments)
def conditional_update(self, training=None):
"""
@@ -352,32 +353,34 @@ Source code for sparseml.keras.optim.mask_pruning
"""
def _update():
- with tf.control_dependencies([self._conditional_training_update()]):
- return tf.no_op("update")
+ with tensorflow.control_dependencies([self._conditional_training_update()]):
+ return tensorflow.no_op("update")
def _no_update():
- return tf.no_op("no_update")
+ return tensorflow.no_op("no_update")
- training = tf.keras.backend.learning_phase() if training is None else training
- return tf.cond(tf.cast(training, tf.bool), _update, _no_update)
+ training = keras.backend.learning_phase() if training is None else training
+ return tensorflow.cond(
+ tensorflow.cast(training, tensorflow.bool), _update, _no_update
+ )
_LAYER_PRUNABLE_PARAMS_MAP = {
- tf.keras.layers.Conv1D: ["kernel"],
- tf.keras.layers.Conv2D: ["kernel"],
- tf.keras.layers.Conv2DTranspose: ["kernel"],
- tf.keras.layers.Conv3D: ["kernel"],
- tf.keras.layers.Conv3DTranspose: ["kernel"],
- tf.keras.layers.Dense: ["kernel"],
- tf.keras.layers.Embedding: ["embeddings"],
- tf.keras.layers.LocallyConnected1D: ["kernel"],
- tf.keras.layers.LocallyConnected2D: ["kernel"],
- tf.keras.layers.SeparableConv1D: ["pointwise_kernel"],
- tf.keras.layers.SeparableConv2D: ["pointwise_kernel"],
+ keras.layers.Conv1D: ["kernel"],
+ keras.layers.Conv2D: ["kernel"],
+ keras.layers.Conv2DTranspose: ["kernel"],
+ keras.layers.Conv3D: ["kernel"],
+ keras.layers.Conv3DTranspose: ["kernel"],
+ keras.layers.Dense: ["kernel"],
+ keras.layers.Embedding: ["embeddings"],
+ keras.layers.LocallyConnected1D: ["kernel"],
+ keras.layers.LocallyConnected2D: ["kernel"],
+ keras.layers.SeparableConv1D: ["pointwise_kernel"],
+ keras.layers.SeparableConv2D: ["pointwise_kernel"],
}
-def _get_default_prunable_params(layer: tf.keras.layers.Layer):
+def _get_default_prunable_params(layer: keras.layers.Layer):
if layer.__class__ in _LAYER_PRUNABLE_PARAMS_MAP:
prunable_param_names = _LAYER_PRUNABLE_PARAMS_MAP[layer.__class__]
return {
@@ -393,7 +396,7 @@ Source code for sparseml.keras.optim.mask_pruning
)
-[docs]class MaskedLayer(tf.keras.layers.Wrapper):
+[docs]class MaskedLayer(keras.layers.Wrapper):
"""
Masked layer is a layer wrapping around another layer with a mask; the mask however
is shared if the enclosed layer is again of MaskedLayer type
@@ -406,13 +409,13 @@ Source code for sparseml.keras.optim.mask_pruning
def __init__(
self,
- layer: tf.keras.layers.Layer,
+ layer: keras.layers.Layer,
pruning_scheduler: PruningScheduler,
mask_type: Union[str, List[int]] = "unstructured",
**kwargs,
):
if not isinstance(layer, MaskedLayer) and not isinstance(
- layer, tf.keras.layers.Layer
+ layer, keras.layers.Layer
):
raise ValueError(
"Invalid layer passed in, expected MaskedLayer or a keras Layer, "
@@ -434,8 +437,8 @@ Source code for sparseml.keras.optim.mask_pruning
self._global_step = self.add_weight(
"global_step",
shape=[],
- initializer=tf.keras.initializers.Constant(-1),
- dtype=tf.int64,
+ initializer=keras.initializers.Constant(-1),
+ dtype=tensorflow.int64,
trainable=False,
)
self._mask_updater = MaskAndWeightUpdater(
@@ -453,7 +456,7 @@ Source code for sparseml.keras.optim.mask_pruning
# for the "core", inner-most, Keras built-in layer
return self._layer.pruning_vars
- assert isinstance(self._layer, tf.keras.layers.Layer)
+ assert isinstance(self._layer, keras.layers.Layer)
prunable_params = _get_default_prunable_params(self._layer)
pruning_vars = []
@@ -461,35 +464,35 @@ Source code for sparseml.keras.optim.mask_pruning
mask = self.add_weight(
"mask",
shape=param.shape,
- initializer=tf.keras.initializers.get("ones"),
+ initializer=keras.initializers.get("ones"),
dtype=param.dtype,
trainable=False,
)
sparsity = self.add_weight(
"sparsity",
shape=[],
- initializer=tf.keras.initializers.get("zeros"),
+ initializer=keras.initializers.get("zeros"),
dtype=param.dtype,
trainable=False,
)
pruning_vars.append(MaskedParamInfo(name, param, mask, sparsity))
return pruning_vars
-[docs] def call(self, inputs: tf.Tensor, training=None):
+[docs] def call(self, inputs: tensorflow.Tensor, training=None):
"""
Forward function for calling layer instance as function
"""
- training = tf.keras.backend.learning_phase() if training is None else training
+ training = keras.backend.learning_phase() if training is None else training
def _apply_masks_to_weights():
- with tf.control_dependencies([self._mask_updater.apply_masks()]):
- return tf.no_op("update")
+ with tensorflow.control_dependencies([self._mask_updater.apply_masks()]):
+ return tensorflow.no_op("update")
def _no_apply_masks_to_weights():
- return tf.no_op("no_update_masks")
+ return tensorflow.no_op("no_update_masks")
- tf.cond(
- tf.cast(training, tf.bool),
+ tensorflow.cond(
+ tensorflow.cast(training, tensorflow.bool),
_apply_masks_to_weights,
_no_apply_masks_to_weights,
)
@@ -504,7 +507,7 @@ Source code for sparseml.keras.optim.mask_pruning
"""
Get layer config
Serialization and deserialization should be done using
- tf.keras.serialize/deserialize, which create and retrieve the "class_name"
+ keras.serialize/deserialize, which create and retrieve the "class_name"
field automatically.
The resulting config below therefore does not contain the field.
"""
@@ -522,11 +525,11 @@ Source code for sparseml.keras.optim.mask_pruning
[docs] @classmethod
def from_config(cls, config):
config = config.copy()
- layer = tf.keras.layers.deserialize(
+ layer = keras.layers.deserialize(
config.pop("layer"), custom_objects={"MaskedLayer": MaskedLayer}
)
if not isinstance(layer, MaskedLayer) and not isinstance(
- layer, tf.keras.layers.Layer
+ layer, keras.layers.Layer
):
raise RuntimeError("Unexpected layer created from config")
pruning_scheduler = PruningScheduler.deserialize(
@@ -561,7 +564,7 @@ Source code for sparseml.keras.optim.mask_pruning
def pruned_layer(self):
if isinstance(self._layer, MaskedLayer):
return self._layer.pruned_layer
- elif isinstance(self._layer, tf.keras.layers.Layer):
+ elif isinstance(self._layer, keras.layers.Layer):
return self._layer
else:
raise RuntimeError("Unrecognized layer")
@@ -571,21 +574,38 @@ Source code for sparseml.keras.optim.mask_pruning
return self._layer
-[docs]def remove_pruning_masks(model: tf.keras.Model):
+[docs]def remove_pruning_masks(model: keras.Model):
"""
Remove pruning masks from a model that was pruned using the MaskedLayer logic
:param model: a model that was pruned using MaskedLayer
:return: the original model with pruned weights
"""
+ def _get_pruned_layer(layer):
+ # If the model is loaded through SavedFormat, the layer of type
+ # MaskedLayer would belong to a special package, hence the
+ # second check below based simply on class name
+ is_masked_layer = isinstance(
+ layer, MaskedLayer
+ ) or layer.__class__.__name__.endswith("MaskedLayer")
+ if is_masked_layer:
+ return _get_pruned_layer(layer.layer)
+ elif isinstance(layer, keras.layers.Layer):
+ return layer
+ else:
+ raise ValueError("Unknown layer type")
+
def _remove_pruning_masks(layer):
- if isinstance(layer, MaskedLayer):
- return layer.pruned_layer
+ is_masked_layer = isinstance(
+ layer, MaskedLayer
+ ) or layer.__class__.__name__.endswith("MaskedLayer")
+ if is_masked_layer:
+ return _get_pruned_layer(layer)
return layer
# TODO: while the resulting model could be exported to ONNX, its built status
# is removed
- return tf.keras.models.clone_model(
+ return keras.models.clone_model(
model, input_tensors=None, clone_function=_remove_pruning_masks
)
diff --git a/sparseml/_modules/sparseml/keras/optim/mask_pruning_creator.html b/sparseml/_modules/sparseml/keras/optim/mask_pruning_creator.html
index e5305105cac..a34db9f67e7 100644
--- a/sparseml/_modules/sparseml/keras/optim/mask_pruning_creator.html
+++ b/sparseml/_modules/sparseml/keras/optim/mask_pruning_creator.html
@@ -197,7 +197,7 @@ Source code for sparseml.keras.optim.mask_pruning_creator
from typing import Any, Callable, Iterable, List, Tuple, Union
import numpy
-import tensorflow as tf
+import tensorflow
__all__ = [
@@ -219,8 +219,8 @@ Source code for sparseml.keras.optim.mask_pruning_creator
[docs] @abstractmethod
def get_mask_initializer(
self,
- tensor: tf.Tensor,
- ) -> Callable[[], tf.Tensor]:
+ tensor: tensorflow.Tensor,
+ ) -> Callable[[], tensorflow.Tensor]:
"""
:param tensor: A tensor of a model layer's weights
:return: Tensor initializer function for this sparsity mask
@@ -230,9 +230,9 @@ Source code for sparseml.keras.optim.mask_pruning_creator
[docs] @abstractmethod
def create_sparsity_mask(
self,
- tensor: tf.Tensor,
- sparsity: tf.Tensor,
- ) -> tf.Tensor:
+ tensor: tensorflow.Tensor,
+ sparsity: tensorflow.Tensor,
+ ) -> tensorflow.Tensor:
"""
:param tensor: A tensor of a model layer's weights
:param sparsity: the target sparsity to use for assigning the masks
@@ -251,8 +251,8 @@ Source code for sparseml.keras.optim.mask_pruning_creator
[docs] def get_mask_initializer(
self,
- tensor: tf.Tensor,
- ) -> Callable[[], tf.Tensor]:
+ tensor: tensorflow.Tensor,
+ ) -> Callable[[], tensorflow.Tensor]:
"""
:param tensor: A tensor of a model layer's weights
:return: Initializer for tensor where an element is 1.0 for nonzero weights
@@ -261,44 +261,46 @@ Source code for sparseml.keras.optim.mask_pruning_creator
"""
def non_zero_mask_initializer(
- shape: tf.TensorShape,
- dtype: tf.DType = tf.float32,
+ shape: tensorflow.TensorShape,
+ dtype: tensorflow.DType = tensorflow.float32,
partition_info: Any = None, # unsued variable for compatability
- ) -> tf.Tensor:
- dtype = tf.as_dtype(dtype)
- if not dtype.is_numpy_compatible or dtype == tf.string:
+ ) -> tensorflow.Tensor:
+ dtype = tensorflow.as_dtype(dtype)
+ if not dtype.is_numpy_compatible or dtype == tensorflow.string:
raise ValueError("Expected numeric or boolean dtype, got %s." % dtype)
- return tf.cast(tf.not_equal(tensor, 0.0), dtype=dtype)
+ return tensorflow.cast(tensorflow.not_equal(tensor, 0.0), dtype=dtype)
return non_zero_mask_initializer
[docs] def create_sparsity_mask(
self,
- tensor: tf.Tensor,
- sparsity: tf.Tensor,
- ) -> tf.Tensor:
+ tensor: tensorflow.Tensor,
+ sparsity: tensorflow.Tensor,
+ ) -> tensorflow.Tensor:
"""
:param tensor: A tensor of a model layer's weights
:param sparsity: the target sparsity to use for assigning the masks
:return: A sparsity mask close to the set sparsity based on the values of
the input tensor
"""
- abs_var = tf.abs(tensor) # Magnitudes of weights
- sparse_threshold_index = tf.cast(
- tf.round(tf.cast(tf.size(abs_var), tf.float32) * sparsity),
- tf.int32,
+ abs_var = tensorflow.abs(tensor) # Magnitudes of weights
+ sparse_threshold_index = tensorflow.cast(
+ tensorflow.round(
+ tensorflow.cast(tensorflow.size(abs_var), tensorflow.float32) * sparsity
+ ),
+ tensorflow.int32,
)
- sparse_threshold_index = tf.minimum(
- tf.maximum(sparse_threshold_index, 0),
- tf.size(tensor) - 1,
+ sparse_threshold_index = tensorflow.minimum(
+ tensorflow.maximum(sparse_threshold_index, 0),
+ tensorflow.size(tensor) - 1,
)
try:
- argsort = tf.argsort
+ argsort = tensorflow.argsort
except Exception:
try:
- argsort = tf.contrib.framework.argsort
+ argsort = tensorflow.contrib.framework.argsort
except Exception:
raise RuntimeError(
"cannot find argsort function in tensorflow_v1, "
@@ -306,16 +308,16 @@ Source code for sparseml.keras.optim.mask_pruning_creator
)
# produce tensor where each element is the index in sorted order of abs_var
- abs_var_flat = tf.reshape(abs_var, [-1])
- element_ranks_flat = tf.scatter_nd(
- tf.expand_dims(argsort(abs_var_flat), 1),
- tf.range(abs_var_flat.get_shape()[0]),
+ abs_var_flat = tensorflow.reshape(abs_var, [-1])
+ element_ranks_flat = tensorflow.scatter_nd(
+ tensorflow.expand_dims(argsort(abs_var_flat), 1),
+ tensorflow.range(abs_var_flat.get_shape()[0]),
abs_var_flat.get_shape(),
)
- element_ranks = tf.reshape(element_ranks_flat, abs_var.get_shape())
- return tf.cast(
- tf.math.greater_equal(element_ranks, sparse_threshold_index),
- tf.float32,
+ element_ranks = tensorflow.reshape(element_ranks_flat, abs_var.get_shape())
+ return tensorflow.cast(
+ tensorflow.math.greater_equal(element_ranks, sparse_threshold_index),
+ tensorflow.float32,
)
def __str__(self):
@@ -333,13 +335,13 @@ Source code for sparseml.keras.optim.mask_pruning_creator
"""
_GROUPING_OPS = {
- "mean": tf.reduce_mean,
- "max": tf.reduce_max,
- "min": tf.reduce_min,
+ "mean": tensorflow.reduce_mean,
+ "max": tensorflow.reduce_max,
+ "min": tensorflow.reduce_min,
}
[docs] @staticmethod
- def get_grouping_op(grouping_op_name: str) -> tf.Operation:
+ def get_grouping_op(grouping_op_name: str) -> tensorflow.Operation:
"""
:param grouping_op_name: name of grouping operation to get tf operation for
:return: tf operation for grouping_op_name if available, raises error otherwise
@@ -351,7 +353,7 @@ Source code for sparseml.keras.optim.mask_pruning_creator
return GroupedPruningMaskCreator._GROUPING_OPS[grouping_op_name]
[docs] @abstractmethod
- def group_tensor(self, tensor: tf.Tensor) -> tf.Tensor:
+ def group_tensor(self, tensor: tensorflow.Tensor) -> tensorflow.Tensor:
"""
:param tensor: The tensor to reduce in groups
:return: The grouped tensor
@@ -361,9 +363,9 @@ Source code for sparseml.keras.optim.mask_pruning_creator
@abstractmethod
def _map_mask_to_tensor(
self,
- grouped_mask: tf.Tensor,
- original_tensor_shape: tf.TensorShape,
- ) -> tf.Tensor:
+ grouped_mask: tensorflow.Tensor,
+ original_tensor_shape: tensorflow.TensorShape,
+ ) -> tensorflow.Tensor:
"""
:param grouped_mask: A binary mask the size of a tensor from group_tensor
:param original_tensor_shape: Shape of the original tensor grouped_mask
@@ -375,33 +377,33 @@ Source code for sparseml.keras.optim.mask_pruning_creator
[docs] def get_mask_initializer(
self,
- tensor: tf.Tensor,
- ) -> Callable[[], tf.Tensor]:
+ tensor: tensorflow.Tensor,
+ ) -> Callable[[], tensorflow.Tensor]:
"""
:param tensor: A tensor of a model layer's weights
:return: Tensor initializer function for this sparsity mask
"""
def grouped_non_zero_mask_initializer(
- shape: tf.TensorShape,
- dtype: tf.DType = tf.float32,
+ shape: tensorflow.TensorShape,
+ dtype: tensorflow.DType = tensorflow.float32,
partition_info: Any = None, # unsued variable for compatability
- ) -> tf.Tensor:
- dtype = tf.as_dtype(dtype)
- if not dtype.is_numpy_compatible or dtype == tf.string:
+ ) -> tensorflow.Tensor:
+ dtype = tensorflow.as_dtype(dtype)
+ if not dtype.is_numpy_compatible or dtype == tensorflow.string:
raise ValueError("Expected numeric or boolean dtype, got %s." % dtype)
grouped_tensor = self.group_tensor(tensor)
- grouped_mask = tf.not_equal(grouped_tensor, 0.0)
+ grouped_mask = tensorflow.not_equal(grouped_tensor, 0.0)
mask = self._map_mask_to_tensor(grouped_mask, tensor.shape)
- return tf.cast(mask, dtype=dtype)
+ return tensorflow.cast(mask, dtype=dtype)
return grouped_non_zero_mask_initializer
[docs] def create_sparsity_mask(
self,
- tensor: tf.Tensor,
- sparsity: tf.Tensor,
- ) -> tf.Tensor:
+ tensor: tensorflow.Tensor,
+ sparsity: tensorflow.Tensor,
+ ) -> tensorflow.Tensor:
"""
:param tensor: A tensor of a model layer's weights
:param sparsity: the target sparsity to use for assigning the masks
@@ -444,7 +446,7 @@ Source code for sparseml.keras.optim.mask_pruning_creator
)
)
- def _set_dim_by_name_for_tensor(self, tensor: tf.Tensor):
+ def _set_dim_by_name_for_tensor(self, tensor: tensorflow.Tensor):
n_dims = len(tensor.shape)
if n_dims <= 2:
if self._dim_name == "channel":
@@ -467,7 +469,7 @@ Source code for sparseml.keras.optim.mask_pruning_creator
)
)
-[docs] def group_tensor(self, tensor: tf.Tensor) -> tf.Tensor:
+[docs] def group_tensor(self, tensor: tensorflow.Tensor) -> tensorflow.Tensor:
"""
:param tensor: The tensor to transform
:return: The absolute mean values of the tensor grouped by the
@@ -478,16 +480,16 @@ Source code for sparseml.keras.optim.mask_pruning_creator
n_dims = len(tensor.shape)
reduced_axis = [idx for idx in range(n_dims) if idx not in self._dim]
return self._grouping_op(
- tf.abs(tensor),
+ tensorflow.abs(tensor),
axis=reduced_axis,
keepdims=True,
)
def _map_mask_to_tensor(
self,
- grouped_mask: tf.Tensor,
- original_tensor_shape: tf.TensorShape,
- ) -> tf.Tensor:
+ grouped_mask: tensorflow.Tensor,
+ original_tensor_shape: tensorflow.TensorShape,
+ ) -> tensorflow.Tensor:
"""
:param grouped_mask: A binary mask the size of a tensor from group_tensor
:param original_tensor_shape: Shape of the original tensor grouped_mask
@@ -496,12 +498,12 @@ Source code for sparseml.keras.optim.mask_pruning_creator
original_tensor_shape
"""
# using tile instead of broadcast_to for compatibility with older tf versions
- # equivalent to: tf.broadcast_to(grouped_mask, original_tensor_shape)
+ # equivalent to: tensorflow.broadcast_to(grouped_mask, original_tensor_shape)
tile_vals = [
dim if idx not in self._dim else 1
for (idx, dim) in enumerate(original_tensor_shape)
]
- return tf.tile(grouped_mask, tile_vals)
+ return tensorflow.tile(grouped_mask, tile_vals)
def __str__(self):
if self._dim_name is not None:
@@ -538,7 +540,7 @@ Source code for sparseml.keras.optim.mask_pruning_creator
self._block_shape = block_shape
self._grouping_op = GroupedPruningMaskCreator.get_grouping_op(grouping_op_name)
-[docs] def group_tensor(self, tensor: tf.Tensor) -> tf.Tensor:
+[docs] def group_tensor(self, tensor: tensorflow.Tensor) -> tensorflow.Tensor:
"""
:param tensor: The tensor to transform
:return: The absolute mean values of the tensor grouped by blocks of
@@ -549,16 +551,18 @@ Source code for sparseml.keras.optim.mask_pruning_creator
n_dims = len(tensor.shape)
if n_dims >= 3:
tens_trans_dims = [n_dims - 2, n_dims - 1, *range(n_dims - 2)]
- tensor = tf.transpose(tensor, tens_trans_dims)
- blocked_tens = tf.reshape(tensor, blocked_tens_shape)
- reduced_blocks = self._grouping_op(tf.abs(blocked_tens), 1, keepdims=True)
+ tensor = tensorflow.transpose(tensor, tens_trans_dims)
+ blocked_tens = tensorflow.reshape(tensor, blocked_tens_shape)
+ reduced_blocks = self._grouping_op(
+ tensorflow.abs(blocked_tens), 1, keepdims=True
+ )
return reduced_blocks
def _map_mask_to_tensor(
self,
- grouped_mask: tf.Tensor,
- original_tensor_shape: tf.TensorShape,
- ) -> tf.Tensor:
+ grouped_mask: tensorflow.Tensor,
+ original_tensor_shape: tensorflow.TensorShape,
+ ) -> tensorflow.Tensor:
"""
:param grouped_mask: A binary mask the size of a tensor from group_tensor
:param original_tensor_shape: Shape of the original tensor grouped_mask
@@ -572,8 +576,8 @@ Source code for sparseml.keras.optim.mask_pruning_creator
) = self._get_blocked_tens_shape_and_validate(original_tensor_shape)
block_values_shape = [blocked_tens_shape[0], blocked_tens_shape[2]]
# expand so every element has a corresponding value in the original tensor
- block_mask = tf.reshape(grouped_mask, block_values_shape)
- block_mask = tf.expand_dims(block_mask, 1)
+ block_mask = tensorflow.reshape(grouped_mask, block_values_shape)
+ block_mask = tensorflow.expand_dims(block_mask, 1)
# Recover reduced dimension of block_mask, using tile instead of broadcast_to
# for compatibility with older versions of tf
@@ -582,21 +586,21 @@ Source code for sparseml.keras.optim.mask_pruning_creator
int(block_dim / mask_dim)
for (block_dim, mask_dim) in zip(blocked_tens_shape, block_mask_shape)
]
- # equivalent to: tf.broadcast_to(block_mask, blocked_tens_shape)
- tensor_mask_blocked = tf.tile(block_mask, tile_shape)
+ # equivalent to: tensorflow.broadcast_to(block_mask, blocked_tens_shape)
+ tensor_mask_blocked = tensorflow.tile(block_mask, tile_shape)
- mask = tf.reshape(tensor_mask_blocked, original_tensor_shape)
+ mask = tensorflow.reshape(tensor_mask_blocked, original_tensor_shape)
# Undo channel / kernel transpose if applicable
n_dims = len(original_tensor_shape)
if n_dims >= 3:
tens_trans_dims = [*range(2, n_dims), 0, 1]
- mask = tf.transpose(mask, tens_trans_dims)
+ mask = tensorflow.transpose(mask, tens_trans_dims)
return mask
def _get_blocked_tens_shape_and_validate(
self,
- tens_shape: tf.TensorShape,
- ) -> Tuple[List[int], tf.TensorShape]:
+ tens_shape: tensorflow.TensorShape,
+ ) -> Tuple[List[int], tensorflow.TensorShape]:
"""
:param tens_shape: The shape of the tensor to group in blocks
:return: shape of tens when blocked by block_shape and the original
diff --git a/sparseml/_modules/sparseml/keras/optim/modifier.html b/sparseml/_modules/sparseml/keras/optim/modifier.html
index cc6c1707433..d9e47874089 100644
--- a/sparseml/_modules/sparseml/keras/optim/modifier.html
+++ b/sparseml/_modules/sparseml/keras/optim/modifier.html
@@ -198,9 +198,9 @@ Source code for sparseml.keras.optim.modifier
from typing import List, Tuple, Union
-import tensorflow as tf
+from tensorflow import Tensor
-from sparseml.keras.utils import KerasLogger
+from sparseml.keras.utils import KerasLogger, keras
from sparseml.optim import (
BaseModifier,
BaseScheduled,
@@ -215,6 +215,7 @@ Source code for sparseml.keras.optim.modifier
"ModifierProp",
"KerasModifierYAML",
"Modifier",
+ "ModifierProp",
"ScheduledModifier",
"ScheduledUpdateModifier",
]
@@ -269,7 +270,7 @@ Source code for sparseml.keras.optim.modifier
optimizer,
steps_per_epoch: int,
loggers: Union[KerasLogger, List[KerasLogger]] = None,
- input_tensors: tf.Tensor = None,
+ input_tensors: Tensor = None,
):
"""
Modify model, optimizer based on the logic of the modifier. Return the modified
@@ -284,7 +285,7 @@ Source code for sparseml.keras.optim.modifier
callback = None
return model, optimizer, callback
-[docs] def finalize(self, model: tf.keras.Model):
+[docs] def finalize(self, model: keras.Model):
"""
Remove extra information related to the modifier from the model that is
not necessary for exporting
@@ -339,14 +340,6 @@ Source code for sparseml.keras.optim.modifier
**kwargs,
)
- @property
- def start_epoch(self):
- return self._start_epoch
-
- @property
- def end_epoch(self):
- return self._end_epoch
-
[docs] def start_end_steps(self, steps_per_epoch, after_optim: bool) -> Tuple[int, int]:
"""
Calculate the start and end steps for this modifier given a certain
diff --git a/sparseml/_modules/sparseml/keras/optim/modifier_lr.html b/sparseml/_modules/sparseml/keras/optim/modifier_lr.html
index 688997aae82..a31e3c9585f 100644
--- a/sparseml/_modules/sparseml/keras/optim/modifier_lr.html
+++ b/sparseml/_modules/sparseml/keras/optim/modifier_lr.html
@@ -195,16 +195,14 @@ Source code for sparseml.keras.optim.modifier_lr
from typing import Dict, List, Union
-import tensorflow as tf
-from tensorflow.keras import backend as K
-from tensorflow.keras.optimizers.schedules import LearningRateSchedule
+from tensorflow import Tensor
from sparseml.keras.optim.modifier import (
KerasModifierYAML,
ScheduledModifier,
ScheduledUpdateModifier,
)
-from sparseml.keras.utils import KerasLogger, LoggerSettingCallback, LoggingMode
+from sparseml.keras.utils import KerasLogger, LoggerSettingCallback, LoggingMode, keras
from sparseml.optim import LearningRate, SetLearningRate
from sparseml.utils import ALL_TOKEN
@@ -212,7 +210,7 @@ Source code for sparseml.keras.optim.modifier_lr
__all__ = ["SetLearningRateModifier", "LearningRateModifier"]
-class LRModifierCallback(tf.keras.callbacks.Callback):
+class LRModifierCallback(keras.callbacks.Callback):
"""
Callback to modify learning rate of an optimizer
@@ -224,10 +222,10 @@ Source code for sparseml.keras.optim.modifier_lr
def __init__(
self,
- optimizer: tf.keras.optimizers.Optimizer,
+ optimizer: keras.optimizers.Optimizer,
start_step: int,
end_step: int,
- learning_rate: Union[float, tf.keras.optimizers.schedules.LearningRateSchedule],
+ learning_rate: Union[float, keras.optimizers.schedules.LearningRateSchedule],
):
self._optimizer = optimizer
self._start_step = start_step
@@ -241,7 +239,10 @@ Source code for sparseml.keras.optim.modifier_lr
:param logs: dictionary of logs (see Keras Callback doc)
"""
- self._step = tf.keras.backend.get_value(self._optimizer.iterations)
+ self._step = keras.backend.get_value(self._optimizer.iterations)
+
+ def on_batch_begin(self, batch, logs=None):
+ self.on_train_batch_begin(batch, logs=logs)
def on_train_batch_begin(self, batch, logs=None):
"""
@@ -257,6 +258,9 @@ Source code for sparseml.keras.optim.modifier_lr
persist_lr = self._optimizer.lr(self._step)
setattr(self._optimizer, "lr", persist_lr)
+ def on_batch_end(self, batch, logs=None):
+ self.on_train_batch_end(batch, logs=logs)
+
def on_train_batch_end(self, batch, logs=None):
"""
Called at the end of a batch in training
@@ -299,7 +303,7 @@ Source code for sparseml.keras.optim.modifier_lr
:param logs: dictionary of logs (see Keras Callback doc)
"""
super().on_train_begin(logs)
- self._step = K.get_value(self.model.optimizer.iterations)
+ self._step = keras.backend.get_value(self.model.optimizer.iterations)
def on_epoch_begin(self, epoch, logs=None):
"""
@@ -346,10 +350,10 @@ Source code for sparseml.keras.optim.modifier_lr
def _get_lr(self):
lr = self.model.optimizer.lr
- if isinstance(lr, LearningRateSchedule):
+ if isinstance(lr, keras.optimizers.schedules.LearningRateSchedule):
lr_val = lr(self.model.optimizer.iterations)
else:
- lr_val = K.get_value(lr)
+ lr_val = keras.backend.get_value(lr)
return lr_val
def _is_logging_step(self):
@@ -404,7 +408,7 @@ Source code for sparseml.keras.optim.modifier_lr
optimizer,
steps_per_epoch: int,
loggers: Union[KerasLogger, List[KerasLogger]] = None,
- input_tensors: tf.Tensor = None,
+ input_tensors: Tensor = None,
):
"""
Modify model and optimizer, and provide callbacks to process the model
@@ -432,7 +436,7 @@ Source code for sparseml.keras.optim.modifier_lr
return model, optimizer, [lr_callback, lr_logging_callback]
-class _ExponentialDecay(tf.keras.optimizers.schedules.ExponentialDecay):
+class _ExponentialDecay(keras.optimizers.schedules.ExponentialDecay):
def __init__(
self,
start_step,
@@ -463,14 +467,14 @@ Source code for sparseml.keras.optim.modifier_lr
def get_config(self):
config = super().get_config()
- config = config.update({"start_step": self.start_step})
+ config.update({"start_step": self.start_step})
return config
-class _PiecewiseConstantDecay(tf.keras.optimizers.schedules.PiecewiseConstantDecay):
+class _PiecewiseConstantDecay(keras.optimizers.schedules.PiecewiseConstantDecay):
def __init__(self, start_step, boundaries, values, name=None):
super().__init__(boundaries, values, name=name)
- self._start_step
+ self._start_step = start_step
@property
def start_step(self):
@@ -484,7 +488,7 @@ Source code for sparseml.keras.optim.modifier_lr
def get_config(self):
config = super().get_config()
- config = config.update({"start_step": self.start_step})
+ config.update({"start_step": self.start_step})
return config
@@ -560,7 +564,8 @@ Source code for sparseml.keras.optim.modifier_lr
elif lr_class == "MultiStepLR":
boundaries = lr_kwargs["milestones"]
values = [
- self.init_lr * (lr_kwargs["gamma"] ^ k) for k in range(len(boundaries))
+ self.init_lr * (lr_kwargs["gamma"] ** k)
+ for k in range(len(boundaries) + 1)
]
learning_rate = _PiecewiseConstantDecay(
start_step, boundaries, values, name="MultiStepLR"
@@ -584,7 +589,7 @@ Source code for sparseml.keras.optim.modifier_lr
optimizer,
steps_per_epoch: int,
loggers: Union[KerasLogger, List[KerasLogger]] = None,
- input_tensors: tf.Tensor = None,
+ input_tensors: Tensor = None,
):
"""
Modify model and optimizer, and provide callbacks to process the model
diff --git a/sparseml/_modules/sparseml/keras/optim/modifier_params.html b/sparseml/_modules/sparseml/keras/optim/modifier_params.html
index f6817488c8f..ce2b7c1b8f7 100644
--- a/sparseml/_modules/sparseml/keras/optim/modifier_params.html
+++ b/sparseml/_modules/sparseml/keras/optim/modifier_params.html
@@ -196,7 +196,7 @@ Source code for sparseml.keras.optim.modifier_params
from typing import List, Union
-import tensorflow
+from tensorflow import Tensor
from sparseml.keras.optim.modifier import (
KerasModifierYAML,
@@ -204,13 +204,14 @@ Source code for sparseml.keras.optim.modifier_params
ScheduledModifier,
)
from sparseml.keras.optim.utils import get_layer_name_from_param
+from sparseml.keras.utils import keras
from sparseml.utils import ALL_TOKEN, convert_to_bool, flatten_iterable
__all__ = ["TrainableParamsModifier"]
-class TrainableParamsCallback(tensorflow.keras.callbacks.Callback):
+class TrainableParamsCallback(keras.callbacks.Callback):
def __init__(self, model, optimizer, layers, trainable, start_step, end_step):
self.model = model
self.optimizer = optimizer
@@ -222,7 +223,7 @@ Source code for sparseml.keras.optim.modifier_params
self.step = None
def on_train_begin(self, logs=None):
- self.step = tensorflow.keras.backend.get_value(self.optimizer.iterations)
+ self.step = keras.backend.get_value(self.optimizer.iterations)
def on_train_batch_begin(self, batch, logs=None):
if self.step == self.start_step:
@@ -372,7 +373,7 @@ Source code for sparseml.keras.optim.modifier_params
model,
optimizer,
steps_per_epoch: int,
- input_tensors: tensorflow.Tensor = None,
+ input_tensors: Tensor = None,
):
model, optimizer, callback = super(TrainableParamsModifier, self).modify(
model, optimizer, steps_per_epoch, input_tensors=input_tensors
diff --git a/sparseml/_modules/sparseml/keras/optim/modifier_pruning.html b/sparseml/_modules/sparseml/keras/optim/modifier_pruning.html
index 997014dfedd..ac132f6e482 100644
--- a/sparseml/_modules/sparseml/keras/optim/modifier_pruning.html
+++ b/sparseml/_modules/sparseml/keras/optim/modifier_pruning.html
@@ -210,8 +210,7 @@ Source code for sparseml.keras.optim.modifier_pruning
ScheduledUpdateModifier,
)
from sparseml.keras.optim.utils import get_layer_name_from_param
-from sparseml.keras.utils.callbacks import LoggerSettingCallback
-from sparseml.keras.utils.logger import KerasLogger
+from sparseml.keras.utils import KerasLogger, LoggerSettingCallback, keras
from sparseml.utils import ALL_TOKEN, convert_to_bool, validate_str_iterable
@@ -394,14 +393,14 @@ Source code for sparseml.keras.optim.modifier_pruning
:param step: training step
:param tensor: tensor (e.g., weight) to compute the sparsity
- :return: target sparsity
+ :return: target sparsity, or None
"""
if tensor is None:
raise ValueError("Invalid empty tensor")
if self._start_step <= step < self._end_step:
mask = tensorflow.cast(tensorflow.not_equal(tensor, 0.0), tensor.dtype)
- sparsity = tensorflow.math.reduce_sum(1.0 - mask).numpy() / tensorflow.size(
- tensor
+ sparsity = float(
+ tensorflow.math.reduce_sum(1.0 - mask).numpy() / tensorflow.size(tensor)
)
elif step == self._end_step:
sparsity = 0.0
@@ -419,7 +418,7 @@ Source code for sparseml.keras.optim.modifier_pruning
return config
-class PruningModifierCallback(tensorflow.keras.callbacks.Callback):
+class PruningModifierCallback(keras.callbacks.Callback):
"""
A callback to update masks and weights at the end of certain training step
@@ -437,8 +436,8 @@ Source code for sparseml.keras.optim.modifier_pruning
:param logs: dictionary of logs (see Keras Callback doc)
"""
- self.step = tensorflow.keras.backend.get_value(self.optim_iters)
- tensorflow.keras.backend.batch_set_value(
+ self.step = keras.backend.get_value(self.optim_iters)
+ keras.backend.batch_set_value(
[(layer.global_step, self.step) for layer in self.prunable_layers]
)
@@ -449,7 +448,7 @@ Source code for sparseml.keras.optim.modifier_pruning
:param batch: batch index in current epoch
:param logs: dictionary of logs (see Keras Callback doc)
"""
- tensorflow.keras.backend.batch_set_value(
+ keras.backend.batch_set_value(
[(layer.global_step, self.step) for layer in self.prunable_layers]
)
@@ -502,7 +501,7 @@ Source code for sparseml.keras.optim.modifier_pruning
:param logs: dictionary of logs (see Keras Callback doc)
"""
super().on_train_begin(logs)
- self._step = tensorflow.keras.backend.get_value(self._start_step)
+ self._step = keras.backend.get_value(self._start_step)
def on_epoch_end(self, epoch, logs=None):
"""
@@ -667,7 +666,7 @@ Source code for sparseml.keras.optim.modifier_pruning
sparsity_scheduler = SparsityFreezer(begin_step, end_step)
return sparsity_scheduler
- def _clone_layer(self, layer: tensorflow.keras.layers.Layer):
+ def _clone_layer(self, layer: keras.layers.Layer):
cloned_layer = layer
if layer.name in self.layer_names: # TODO: handle regex params
cloned_layer = MaskedLayer(
@@ -702,7 +701,7 @@ Source code for sparseml.keras.optim.modifier_pruning
input_tensors=input_tensors,
)
self._sparsity_scheduler = self._create_sparsity_scheduler(steps_per_epoch)
- cloned_model = tensorflow.keras.models.clone_model(
+ cloned_model = keras.models.clone_model(
model,
input_tensors,
clone_function=self._clone_layer,
@@ -716,7 +715,7 @@ Source code for sparseml.keras.optim.modifier_pruning
callbacks.append(sparsity_logging_callback)
return cloned_model, optimizer, callbacks
-[docs] def finalize(self, model: tensorflow.keras.Model):
+[docs] def finalize(self, model: keras.Model):
"""
Remove extra information related to the modifier from the model that is
not necessary for exporting
@@ -1037,7 +1036,7 @@ Source code for sparseml.keras.optim.modifier_pruning
)
return sparsity_scheduler
- def _clone_layer(self, layer: tensorflow.keras.layers.Layer):
+ def _clone_layer(self, layer: keras.layers.Layer):
cloned_layer = layer
if (
layer.name in self.layer_names
@@ -1078,7 +1077,7 @@ Source code for sparseml.keras.optim.modifier_pruning
self._sparsity_scheduler = self._create_sparsity_scheduler(steps_per_epoch)
# Clone model and additional set up
- cloned_model = tensorflow.keras.models.clone_model(
+ cloned_model = keras.models.clone_model(
model,
input_tensors,
clone_function=self._clone_layer,
@@ -1100,7 +1099,7 @@ Source code for sparseml.keras.optim.modifier_pruning
def prunable_layers(self):
return self._masked_layers
-[docs] def finalize(self, model: tensorflow.keras.Model):
+[docs] def finalize(self, model: keras.Model):
"""
Remove extra information related to the modifier from the model that is
not necessary for exporting
diff --git a/sparseml/_modules/sparseml/keras/utils/callbacks.html b/sparseml/_modules/sparseml/keras/utils/callbacks.html
index 700944f89eb..32e80ab6255 100644
--- a/sparseml/_modules/sparseml/keras/utils/callbacks.html
+++ b/sparseml/_modules/sparseml/keras/utils/callbacks.html
@@ -195,9 +195,9 @@ Source code for sparseml.keras.utils.callbacks
from typing import List, Union
-import tensorflow
-from tensorflow import Tensor, keras
+from tensorflow import Tensor
+from sparseml.keras.utils.compat import keras
from sparseml.keras.utils.logger import KerasLogger, LoggingMode
@@ -381,7 +381,7 @@
Source code for sparseml.keras.utils.callbacks
:param logs: dictionary of logs (see Keras Callback doc)
"""
super().on_train_begin(logs)
- self._step = tensorflow.keras.backend.get_value(self._start_step)
+ self._step = keras.backend.get_value(self._start_step)
[docs] def on_epoch_end(self, epoch, logs=None):
"""
@@ -410,7 +410,10 @@ Source code for sparseml.keras.utils.callbacks
return
for logger in self._loggers:
assert logger.mode == LoggingMode.TRAIN
- if logger.update_freq == "batch" or self._step % logger.update_freq == 0:
+ if logger.update_freq == "batch" or (
+ isinstance(logger.update_freq, int)
+ and self._step % logger.update_freq == 0
+ ):
for tag, value in logs.items():
logger.log_scalar("batch_{}".format(tag), value, step=self._step)
diff --git a/sparseml/_modules/sparseml/keras/utils/compat.html b/sparseml/_modules/sparseml/keras/utils/compat.html
new file mode 100644
index 00000000000..1e17fa3fc9c
--- /dev/null
+++ b/sparseml/_modules/sparseml/keras/utils/compat.html
@@ -0,0 +1,272 @@
+
+
+
+
+
+
+
+
+
+ sparseml.keras.utils.compat — SparseML 0.1.0 documentation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Source code for sparseml.keras.utils.compat
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+try:
+ import keras as native_keras
+except ModuleNotFoundError:
+ native_keras = None
+
+import tensorflow
+
+
+__all__ = [
+ "assign",
+ "keras",
+]
+
+
+keras = native_keras if native_keras is not None else tensorflow.keras
+
+
+[docs]def assign(lhs, rhs, name=None):
+ if hasattr(tensorflow, "assign"):
+ return tensorflow.assign(lhs, rhs, name=name)
+ else:
+ return lhs.assign(rhs, name=name)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/sparseml/_modules/sparseml/keras/utils/exporter.html b/sparseml/_modules/sparseml/keras/utils/exporter.html
index 22c38071af1..5398f310df5 100644
--- a/sparseml/_modules/sparseml/keras/utils/exporter.html
+++ b/sparseml/_modules/sparseml/keras/utils/exporter.html
@@ -196,8 +196,7 @@ Source code for sparseml.keras.utils.exporter
import os
from typing import Any, List
-from tensorflow.keras import Model
-
+from sparseml.keras.utils import keras
from sparseml.utils import clean_path, create_parent_dirs, tensors_export
@@ -226,7 +225,7 @@ Source code for sparseml.keras.utils.exporter
def __init__(
self,
- model: Model,
+ model: keras.Model,
output_dir: str,
):
self._model = model
@@ -238,6 +237,7 @@ Source code for sparseml.keras.utils.exporter
opset: int = DEFAULT_ONNX_OPSET,
doc_string: str = "",
debug_mode: bool = True,
+ raise_on_tf_support: bool = True,
**kwargs,
):
"""
@@ -252,6 +252,16 @@ Source code for sparseml.keras.utils.exporter
if keras2onnx_import_error is not None:
raise keras2onnx_import_error
+ if raise_on_tf_support:
+ import tensorflow
+ v = tensorflow.__version__
+ if v >= "2.3.0":
+ raise ValueError(
+ f"Tensorflow version {v} is greater than the currently supported "
+ "version for keras2onnx. Please downgrade the Tensorflow <2.3.0 "
+ "or set raise_on_tf_support to False to continue."
+ )
+
model_name = self._model.name or name.split(".onnx")[0]
onnx_model = keras2onnx.convert_keras(
self._model,
diff --git a/sparseml/_modules/sparseml/keras/utils/model.html b/sparseml/_modules/sparseml/keras/utils/model.html
index d1039156644..a8e3bc15535 100644
--- a/sparseml/_modules/sparseml/keras/utils/model.html
+++ b/sparseml/_modules/sparseml/keras/utils/model.html
@@ -193,8 +193,9 @@ Source code for sparseml.keras.utils.model
Utils for Keras model
"""
-import tensorflow as tf
-from tensorflow import keras
+import tensorflow
+
+from sparseml.keras.utils import keras
__all__ = ["sparsity"]
@@ -207,7 +208,7 @@ Source code for sparseml.keras.utils.model
:param model: a Keras model
:return: (1) model sparsity, (2) dictionary of layer sparsity
"""
- zero = tf.constant(0, dtype=tf.float32)
+ zero = tensorflow.constant(0, dtype=tensorflow.float32)
model_weight_size = 0
model_zeros = 0
sparsity_dict = {}
@@ -216,10 +217,12 @@ Source code for sparseml.keras.utils.model
layer_sparsity_dict = {}
for i, weight in enumerate(layer.trainable_weights):
- mask = tf.cast(tf.equal(weight, zero), tf.uint8)
+ mask = tensorflow.cast(tensorflow.equal(weight, zero), tensorflow.uint8)
- weight_size = tf.size(weight)
- zeros = tf.cast(tf.math.count_nonzero(mask), tf.int32)
+ weight_size = tensorflow.size(weight)
+ zeros = tensorflow.cast(
+ tensorflow.math.count_nonzero(mask), tensorflow.int32
+ )
layer_sparsity_dict[weight.name] = zeros / weight_size
model_weight_size += weight_size
diff --git a/sparseml/_modules/sparseml/onnx/utils/graph_editor.html b/sparseml/_modules/sparseml/onnx/utils/graph_editor.html
index f3c9ae4a055..60f19ded0c6 100644
--- a/sparseml/_modules/sparseml/onnx/utils/graph_editor.html
+++ b/sparseml/_modules/sparseml/onnx/utils/graph_editor.html
@@ -193,16 +193,18 @@ Source code for sparseml.onnx.utils.graph_editor
Helper functions to edit ONNX Graphs.
"""
-from typing import Iterable, List, Union
+from collections import defaultdict
+from typing import Iterable, List, Optional, Union
import numpy
import onnx
-from onnx import ModelProto, NodeProto, numpy_helper
+from onnx import ModelProto, NodeProto, TensorProto, numpy_helper
from sparseml.onnx.utils.helpers import get_node_params
__all__ = [
+ "ONNXGraph",
"update_model_param",
"swap_node_output",
"remove_node_and_params_from_graph",
@@ -213,6 +215,177 @@ Source code for sparseml.onnx.utils.graph_editor
]
+[docs]class ONNXGraph(object):
+ """
+ Class for quick look-up of ONNX graph nodes and initializers. If graph state
+ changes outside of ONNXGraph class functions, update() should be called.
+
+ :param model: the ONNX graph to represent
+ """
+
+ def __init__(self, model: ModelProto):
+ self._model = model
+ self._output_id_to_node = {}
+ self._input_id_to_nodes = defaultdict(list)
+ self._name_to_initializer = {}
+
+ self.update()
+
+[docs] def update(self, model: Optional[ModelProto] = None):
+ """
+ Update the graph state based on the model this graph represents or
+ the given model.
+
+ :param model: model to represent. defaults to current loaded model state
+ """
+ self._model = model or self._model
+
+ # nodes
+ self._output_id_to_node = {}
+ self._input_id_to_nodes = defaultdict(list)
+ for node in self._model.graph.node:
+ self._store_node_edges(node)
+
+ # initializers
+ self._name_to_initializer = {
+ init.name: init for init in self._model.graph.initializer
+ }
+
+[docs] def get_init_by_name(self, name: str) -> Optional[TensorProto]:
+ """
+ :param name: name of initializer
+ :return: tensor of initializer with given name, returns None if the name does
+ not exist in the cached graph
+ """
+ return self._name_to_initializer.get(name)
+
+[docs] def get_node_parents(
+ self, node: NodeProto
+ ) -> List[Union[NodeProto, TensorProto, None]]:
+ """
+ :param node: node to get the input objects for
+ :return: input nodes or tensors of this node in order. if an input doesn't exist,
+ None will be returned in its place
+ """
+ inputs = []
+ for input_id in node.input:
+ inp = None
+ if input_id in self._output_id_to_node:
+ inp = self._output_id_to_node[input_id]
+ elif input_id in self._name_to_initializer:
+ inp = self._name_to_initializer[input_id]
+ inputs.append(inp)
+ return inputs
+
+[docs] def get_node_children(self, node: NodeProto) -> List[NodeProto]:
+ """
+ :param node: the node to get the children node of
+ :return: list of nodes that include this node as an output
+ """
+ children = []
+ for output_id in node.output:
+ children.extend(self._input_id_to_nodes[output_id])
+ return children
+
+[docs] def add_node(self, node: NodeProto):
+ """
+ Adds the given node to the model and graph state
+
+ :param node: node to add to the model
+ """
+ self._model.graph.node.append(node)
+ self._store_node_edges(node)
+
+[docs] def update_node_input(
+ self, node: NodeProto, input_id: str, input_idx: Optional[int] = None
+ ):
+ """
+ :param node: node to update the inputs of
+ :param input_id: new input_id to attach to the node
+ :param input_idx: optional index of the node input list to update,
+ if none is given, the new input id will be appended to the input list
+ """
+ if input_idx is not None:
+ if node in self._input_id_to_nodes[node.input[input_idx]]:
+ self._input_id_to_nodes[node.input[input_idx]].remove(node)
+ node.input[input_idx] = input_id
+ else:
+ node.input.append(input_id)
+ self._input_id_to_nodes[input_id].append(node)
+
+[docs] def delete_node(self, node: NodeProto):
+ """
+ deletes the given node from the graph
+
+ :param node: node to delete
+ """
+ self._model.graph.node.remove(node)
+ self._delete_node_edges(node)
+
+[docs] def delete_nodes(self, nodes: List[NodeProto]):
+ """
+ deletes the given nodes from the graph
+ :param nodes: list of nodes to delete
+ """
+ node_ouptut_ids_to_delete = {node.output[0] for node in nodes}
+ nodes_to_keep = []
+ for node in self._model.graph.node:
+ if node.output[0] in node_ouptut_ids_to_delete:
+ self._delete_node_edges(node)
+ else:
+ nodes_to_keep.append(node)
+ self._model.graph.ClearField("node")
+ self._model.graph.node.extend(nodes_to_keep)
+
+[docs] def delete_initializers(self, initializers: List[Union[str, TensorProto]]):
+ """
+ deletes the given initializers from the model
+
+ :param initializers: list of initializers or initializer names to delete
+ """
+ inits_to_delete = {
+ init if isinstance(init, str) else init.name for init in initializers
+ }
+ inits_to_keep = []
+ for init in self._model.graph.initializer:
+ if init.name in inits_to_delete:
+ # keep edge reference if nodes in the graph still point to the
+ # initializer name
+ if not self._input_id_to_nodes[init.name]:
+ del self._input_id_to_nodes[init.name]
+ del self._name_to_initializer[init.name]
+ else:
+ inits_to_keep.append(init)
+ self._model.graph.ClearField("initializer")
+ self._model.graph.initializer.extend(inits_to_keep)
+
+[docs] def delete_unused_initializers(self):
+ """
+ deletes tensors in the initializer list that are not listed as inputs to any node
+ in the current graph state
+ """
+ self.delete_initializers(
+ [
+ init
+ for init in self._model.graph.initializer
+ if not self._input_id_to_nodes[init.name]
+ ]
+ ) # delete inits that have no edge
+
+ def _store_node_edges(self, node: NodeProto):
+ for output_id in node.output:
+ self._output_id_to_node[output_id] = node
+ for input_id in node.input:
+ self._input_id_to_nodes[input_id].append(node)
+
+ def _delete_node_edges(self, node: NodeProto):
+ # remove node edges from cache
+ for output_id in node.output:
+ del self._output_id_to_node[output_id]
+ for input_id in node.input:
+ self._input_id_to_nodes[input_id].remove(node)
+
+
[docs]def update_model_param(
model: ModelProto,
param_name: str,
diff --git a/sparseml/_modules/sparseml/onnx/utils/graph_optimizer.html b/sparseml/_modules/sparseml/onnx/utils/graph_optimizer.html
index a4251a3b28a..c91063a3097 100644
--- a/sparseml/_modules/sparseml/onnx/utils/graph_optimizer.html
+++ b/sparseml/_modules/sparseml/onnx/utils/graph_optimizer.html
@@ -200,6 +200,7 @@ Source code for sparseml.onnx.utils.graph_optimizer
import onnx
from sparseml.onnx.utils.graph_editor import (
+ ONNXGraph,
remove_node_and_params_from_graph,
swap_node_output,
update_model_param,
@@ -217,6 +218,7 @@ Source code for sparseml.onnx.utils.graph_optimizer
__all__ = [
"fold_conv_bns",
"quantize_resnet_identity_add_inputs",
+ "quantized_residual_add_optim",
]
@@ -317,7 +319,7 @@ Source code for sparseml.onnx.utils.graph_optimizer
or add op and a quantize -> de-quantize block that takes the same relu as input.
Performs this optimization in place.
- :param quantized_model: A loaded quantized model to performed this optimization on
+ :param quantized_model: A loaded quantized model to perform this optimization on
:return: True if an in-place optimization was made
"""
@@ -369,6 +371,100 @@ Source code for sparseml.onnx.utils.graph_optimizer
optimization_made = True
return optimization_made
+
+
+[docs]def quantized_residual_add_optim(quantized_model: onnx.ModelProto) -> bool:
+ """
+ This optimization adds a quant/dequant block to the identity branch of a
+ residual whose non-identity branch is quantized. This enables the add at the
+ end of the residual to be fused at runtime.
+
+ Function will match to any node who has two children nodes - one add node
+ and one quantize node whose branch eventually leads to the other add node.
+
+ :param quantized_model: A loaded quantized model to perform this optimization on
+ :return: True if an in-place optimization was made
+ """
+ graph = ONNXGraph(quantized_model)
+ optimization_made = False
+ for node in quantized_model.graph.node:
+ children_nodes = graph.get_node_children(node)
+ if len(children_nodes) != 2:
+ continue
+
+ add_node = [node for node in children_nodes if node.op_type == "Add"]
+ quant_node = [
+ node for node in children_nodes if node.op_type == "QuantizeLinear"
+ ]
+ if not add_node or not quant_node:
+ continue
+ add_node = add_node[0]
+ quant_node = quant_node[0]
+
+ # verify that quant_node eventually leads to add_node
+ curr_node = [quant_node]
+ iter = 0
+ max_iter = 20 # avoid cycles
+ while curr_node and curr_node[0] != add_node and iter < max_iter:
+ curr_node = graph.get_node_children(curr_node[0])
+ iter += 1
+ if curr_node[0] != add_node:
+ continue
+
+ # create de-quantize node for identity
+ dequant_node = _make_dequant_node_for_quant(quant_node)
+
+ # update graph
+ identity_edge_idx = 0 if add_node.input[0] == node.output[0] else 1
+ graph.add_node(dequant_node)
+ graph.update_node_input(add_node, dequant_node.output[0], identity_edge_idx)
+ optimization_made = True
+
+ # if any of the add children have are a quantize op while others aren't
+ # add a quant/dequant block to the non quantized paths to allow for fusion
+ # of the add
+ add_node_children = graph.get_node_children(add_node)
+ add_node_quant_child_idx = [
+ idx
+ for idx, node in enumerate(add_node_children)
+ if node.op_type == "QuantizeLinear"
+ ]
+ if not add_node_quant_child_idx or all(
+ n.op_type == "Add" or n.op_type == "QuantizeLinear"
+ for n in add_node_children
+ ):
+ # no quant child node, or all child nodes are quant/add nodes
+ continue
+
+ # make dequant pair node for quant child and add to graph
+ add_node_dequant_child = _make_dequant_node_for_quant(
+ add_node_children[add_node_quant_child_idx[0]]
+ )
+ graph.add_node(add_node_dequant_child)
+
+ # update all non quant node children to take the quant/dequant block as input
+ for add_child_node in add_node_children:
+ if add_child_node.op_type == "QuantizeLinear":
+ continue
+ add_node_id_idx = [
+ idx
+ for idx, output_id in enumerate(add_child_node.input)
+ if output_id == add_node.output[0]
+ ][0]
+ graph.update_node_input(
+ add_child_node, add_node_dequant_child.output[0], add_node_id_idx
+ )
+
+ return optimization_made
+
+
+def _make_dequant_node_for_quant(quant_node: onnx.NodeProto) -> onnx.NodeProto:
+ return onnx.helper.make_node(
+ "DequantizeLinear",
+ [quant_node.output[0]] + quant_node.input[1:], # new inputs
+ [f"{quant_node.output[0]}_dequantized"], # output name
+ f"{quant_node.name or quant_node.output[0]}_dequantized", # node name
+ )
diff --git a/sparseml/_modules/sparseml/onnx/utils/helpers.html b/sparseml/_modules/sparseml/onnx/utils/helpers.html
index 94b9237e393..b3dcca72e65 100644
--- a/sparseml/_modules/sparseml/onnx/utils/helpers.html
+++ b/sparseml/_modules/sparseml/onnx/utils/helpers.html
@@ -247,6 +247,8 @@ Source code for sparseml.onnx.utils.helpers
"get_kernel_shape",
"calculate_flops",
"get_quantize_parent_for_dequantize_node",
+ "get_tensor_dim_shape",
+ "set_tensor_dim_shape",
]
@@ -1366,6 +1368,26 @@ Source code for sparseml.onnx.utils.helpers
input_nodes = get_node_input_nodes(quantized_model, curr_node)
curr_node = input_nodes[0] if input_nodes else None
return curr_node
+
+
+[docs]def get_tensor_dim_shape(tensor: onnx.TensorProto, dim: int) -> int:
+ """
+ :param tensor: ONNX tensor to get the shape of a dimension of
+ :param dim: dimension index of the tensor to get the shape of
+ :return: shape of the tensor at the given dimension
+ """
+ return tensor.type.tensor_type.shape.dim[dim].dim_value
+
+
+[docs]def set_tensor_dim_shape(tensor: onnx.TensorProto, dim: int, value: int):
+ """
+ Sets the shape of the tensor at the given dimension to the given value
+
+ :param tensor: ONNX tensor to modify the shape of
+ :param dim: dimension index of the tensor to modify the shape of
+ :param value: new shape for the given dimension
+ """
+ tensor.type.tensor_type.shape.dim[dim].dim_value = value
diff --git a/sparseml/_modules/sparseml/optim/modifier.html b/sparseml/_modules/sparseml/optim/modifier.html
index 7eb93702d1f..d156290dad5 100644
--- a/sparseml/_modules/sparseml/optim/modifier.html
+++ b/sparseml/_modules/sparseml/optim/modifier.html
@@ -486,8 +486,8 @@ Source code for sparseml.optim.modifier
)
raise ValueError(
"Invalid modifier location. Grouped modifiers in recipes must "
- "be listed in lists with 'modifiers' in its name. A modifier of "
- f"type {modifier_type} was found in recipe list {name}"
+ "be listed in lists with 'modifiers' in its name. A modifier "
+ f"of type {modifier_type} was found in recipe list {name}"
)
return modifiers
diff --git a/sparseml/_modules/sparseml/pytorch/models/detection/yolo_v3.html b/sparseml/_modules/sparseml/pytorch/models/detection/yolo_v3.html
index 8d3b4981a5b..99fd003fcea 100644
--- a/sparseml/_modules/sparseml/pytorch/models/detection/yolo_v3.html
+++ b/sparseml/_modules/sparseml/pytorch/models/detection/yolo_v3.html
@@ -354,7 +354,7 @@ Source code for sparseml.pytorch.models.detection.yolo_v3
_init_conv(self.conv)
# smart bias initialization
- b = self.conv.bias.view(3, -1)
+ b = self.conv.bias.view(3, -1).detach()
b[:, 4] += math.log(8 / 640 ** 2) # 8 objects per 640 image
b[:, 5:] += math.log(0.6 / (self.num_classes - 0.99))
self.conv.bias = Parameter(b.view(-1), requires_grad=True)
diff --git a/sparseml/_modules/sparseml/pytorch/nn/activations.html b/sparseml/_modules/sparseml/pytorch/nn/activations.html
index b1af7ca3b29..62df66cdf96 100644
--- a/sparseml/_modules/sparseml/pytorch/nn/activations.html
+++ b/sparseml/_modules/sparseml/pytorch/nn/activations.html
@@ -202,6 +202,12 @@ Source code for sparseml.pytorch.nn.activations
<
from torch.nn import ReLU6 as TReLU6
+try:
+ from torch.nn import SiLU
+except:
+ SiLU = None
+
+
__all__ = [
"ReLU",
"ReLU6",
@@ -211,6 +217,7 @@ Source code for sparseml.pytorch.nn.activations
<
"hard_swish",
"create_activation",
"replace_activation",
+ "replace_activations",
"is_activation",
]
@@ -335,7 +342,7 @@ Source code for sparseml.pytorch.nn.activations
<
:param module: the module to replace the activation function in
:param name: the name of the layer to replace the activation for
:param act_type: the type of activation to replace with; options:
- [relu, relu6, prelu, lrelu, swish]
+ [relu, relu6, prelu, lrelu, swish, silu]
:param inplace: True to create the activation as an inplace, False otherwise
:param num_channels: The number of channels to create the activation for
:param kwargs: Additional kwargs to pass to the activation constructor
@@ -362,6 +369,42 @@ Source code for sparseml.pytorch.nn.activations
<
return act
+[docs]def replace_activations(
+ module: Module,
+ act_type: str,
+ inplace: bool = False,
+ num_channels: Union[int, None] = None,
+ **kwargs,
+) -> Module:
+ """
+ General function to replace all activation functions in a Module
+ with a new one.
+
+ :param module: the module to replace the activation function in
+ :param act_type: the type of activation to replace with; options:
+ [relu, relu6, prelu, lrelu, swish, silu]
+ :param inplace: True to create the activation as an inplace, False otherwise
+ :param num_channels: The number of channels to create the activation for
+ :param kwargs: Additional kwargs to pass to the activation constructor
+ :return: the updated module
+ """
+ if is_activation(module):
+ return create_activation(
+ act_type, inplace=inplace, num_channels=num_channels, **kwargs
+ )
+
+ for child_name, child_module in module.named_children():
+ setattr(
+ module,
+ child_name,
+ replace_activations(
+ child_module, act_type, inplace, num_channels, **kwargs
+ ),
+ )
+
+ return module
+
+
[docs]def create_activation(
act_type: str, inplace: bool, num_channels: int, **kwargs
) -> Module:
@@ -369,7 +412,7 @@ Source code for sparseml.pytorch.nn.activations
<
Create an activation function using the given parameters.
:param act_type: the type of activation to replace with; options:
- [relu, relu6, prelu, lrelu, swish, hardswish]
+ [relu, relu6, prelu, lrelu, swish, hardswish, silu]
:param inplace: True to create the activation as an inplace, False otherwise
:param num_channels: The number of channels to create the activation for
:param kwargs: Additional kwargs to pass to the activation constructor
@@ -395,6 +438,9 @@ Source code for sparseml.pytorch.nn.activations
<
if act_type == "hardswish":
return Hardswish(num_channels=num_channels, inplace=inplace)
+ if act_type == "silu":
+ return SiLU(**kwargs)
+
raise ValueError("unknown act_type given of {}".format(act_type))
@@ -413,6 +459,7 @@ Source code for sparseml.pytorch.nn.activations
<
or isinstance(module, LeakyReLU)
or isinstance(module, Swish)
or isinstance(module, Hardswish)
+ or (SiLU is not None and isinstance(module, SiLU))
)
diff --git a/sparseml/_modules/sparseml/pytorch/optim/modifier_quantization.html b/sparseml/_modules/sparseml/pytorch/optim/modifier_quantization.html
index 66d42ca0642..cfeb1f432c0 100644
--- a/sparseml/_modules/sparseml/pytorch/optim/modifier_quantization.html
+++ b/sparseml/_modules/sparseml/pytorch/optim/modifier_quantization.html
@@ -211,7 +211,7 @@ Source code for sparseml.pytorch.optim.modifier_quantization
from sparseml.optim import ModifierProp
from sparseml.pytorch.optim.modifier import PyTorchModifierYAML, ScheduledModifier
-from sparseml.pytorch.optim.quantization import (
+from sparseml.pytorch.utils.quantization import (
add_quant_dequant,
fuse_module_conv_bn_relus,
get_qat_qconfig,
diff --git a/sparseml/_modules/sparseml/pytorch/utils/callbacks.html b/sparseml/_modules/sparseml/pytorch/utils/callbacks.html
new file mode 100644
index 00000000000..ce222f1f868
--- /dev/null
+++ b/sparseml/_modules/sparseml/pytorch/utils/callbacks.html
@@ -0,0 +1,345 @@
+
+
+
+
+
+
+
+
+
+ sparseml.pytorch.utils.callbacks — SparseML 0.1.0 documentation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Source code for sparseml.pytorch.utils.callbacks
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, Iterable
+
+import numpy
+from torch import Tensor
+
+from sparseml.utils.datasets import (
+ CIFAR_10_CLASSES,
+ COCO_CLASSES,
+ COCO_CLASSES_80,
+ IMAGENET_CLASSES,
+ IMAGENETTE_CLASSES,
+ VOC_CLASSES,
+)
+
+
+__all__ = [
+ "apply_one_hot_label_mapping",
+ "cifar10_label_mapping",
+ "imagenette_label_mapping",
+ "imagenet_label_mapping",
+ "mnist_label_mapping",
+ "coco_yolo_2017_mapping",
+ "coco_mapping",
+]
+
+##############################
+#
+# Callbacks for mapping labels
+#
+##############################
+
+
+[docs]def apply_one_hot_label_mapping(labels: Tensor, class_names: Dict[Any, str]):
+ def _apply_label(label: int):
+ one_hot_label = [0] * len(class_names.keys())
+ one_hot_label[label] = 1
+ return one_hot_label
+
+ arr = [
+ numpy.array([_apply_label(label) for label in labels]),
+ numpy.array([[val for _, val in class_names.items()]] * len(labels)),
+ ]
+
+ return arr
+
+
+def apply_box_label_mapping(labels: Iterable[Tensor], class_names: Dict[Any, str]):
+ class_names = [
+ class_names[i] if i in class_names else ""
+ for i in range(max(class_names.keys()) + 1)
+ ]
+ return [
+ labels[0],
+ labels[1],
+ [numpy.array([class_names] * labels[0].shape[0])],
+ ]
+
+
+[docs]def cifar10_label_mapping(labels: Tensor):
+ return apply_one_hot_label_mapping(labels, CIFAR_10_CLASSES)
+
+
+[docs]def imagenette_label_mapping(labels: Tensor):
+ return apply_one_hot_label_mapping(
+ labels,
+ IMAGENETTE_CLASSES,
+ )
+
+
+[docs]def imagenet_label_mapping(labels: Tensor):
+ return apply_one_hot_label_mapping(
+ labels,
+ IMAGENET_CLASSES,
+ )
+
+
+[docs]def mnist_label_mapping(labels: Tensor):
+ return apply_one_hot_label_mapping(labels, {idx: str(idx) for idx in range(10)})
+
+
+[docs]def coco_yolo_2017_mapping(labels: Iterable[Tensor]):
+ class_names = [val for _, val in COCO_CLASSES_80.items()]
+
+ return [
+ labels[0],
+ [numpy.array([class_names] * labels[0].shape[0])],
+ ]
+
+
+[docs]def coco_mapping(labels: Iterable[Tensor]):
+ return apply_box_label_mapping(labels, COCO_CLASSES)
+
+
+def voc_mapping(labels: Iterable[Tensor]):
+ return apply_box_label_mapping(labels, VOC_CLASSES)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/sparseml/_modules/sparseml/pytorch/utils/exporter.html b/sparseml/_modules/sparseml/pytorch/utils/exporter.html
index f5421e57774..3e532332863 100644
--- a/sparseml/_modules/sparseml/pytorch/utils/exporter.html
+++ b/sparseml/_modules/sparseml/pytorch/utils/exporter.html
@@ -193,9 +193,10 @@ Source code for sparseml.pytorch.utils.exporter
<
Export PyTorch models to the local device
"""
+import logging
import os
from copy import deepcopy
-from typing import Any, Iterable, List
+from typing import Any, Callable, Iterable, List, Optional, Tuple
import numpy
import onnx
@@ -204,13 +205,20 @@ Source code for sparseml.pytorch.utils.exporter
<
from torch import Tensor
from torch.nn import Module
from torch.optim.optimizer import Optimizer
+from torch.utils.data import DataLoader
from sparseml.pytorch.utils.helpers import (
tensors_export,
tensors_module_forward,
tensors_to_device,
)
-from sparseml.pytorch.utils.model import is_parallel_model, save_model
+from sparseml.pytorch.utils.model import (
+ is_parallel_model,
+ save_model,
+ script_model,
+ trace_model,
+)
+from sparseml.pytorch.utils.quantization import quantize_torch_qat_export
from sparseml.utils import clean_path, create_parent_dirs
@@ -218,6 +226,7 @@ Source code for sparseml.pytorch.utils.exporter
<
DEFAULT_ONNX_OPSET = 9 if torch.__version__ < "1.3" else 11
+_LOGGER = logging.getLogger(__name__)
[docs]class ModuleExporter(object):
@@ -240,12 +249,92 @@ Source code for sparseml.pytorch.utils.exporter
<
self._module = deepcopy(module).to("cpu").eval()
self._output_dir = clean_path(output_dir)
+[docs] def export_to_zoo(
+ self,
+ dataloader: DataLoader,
+ original_dataloader: Optional[DataLoader] = None,
+ shuffle: bool = False,
+ max_samples: int = 20,
+ data_split_cb: Optional[Callable[[Any], Tuple[Any, Any]]] = None,
+ label_mapping_cb: Optional[Callable[[Any], Any]] = None,
+ trace_script: bool = False,
+ fail_on_torchscript_failure: bool = True,
+ export_entire_model: bool = False,
+ ):
+ """
+ Creates and exports all related content of module including
+ sample data, onnx, pytorch and torchscript.
+
+ :param dataloader: DataLoader used to generate sample data
+ :param original_dataloader: Optional dataloader to obtain the untransformed
+ image.
+ :param shuffle: Whether to shuffle sample data
+ :param max_samples: Max number of sample data to create
+ :param data_split_cb: Optional callback function to split data sample into
+ a tuple (features,labels). If not provided will assume dataloader
+ returns a tuple (features,labels).
+ :param label_mapping_cb: Optional callback function to mapping dataset label to
+ other formats.
+ :param dataset_wrapper: Wrapper function for the dataset to add original data
+ to each sample. If set to None will default to use the
+ 'iter_dataset_with_orig_wrapper' function.
+ :param trace_script: If true, creates torchscript via tracing. Otherwise,
+ creates the torchscripe via scripting.
+ :param fail_on_torchscript_failure: If true, fails if torchscript is unable
+ to export model.
+ :param export_entire_model: Exports entire file instead of state_dict
+ """
+ sample_batches = []
+ sample_labels = []
+ sample_originals = None
+ if original_dataloader is not None:
+ sample_originals = []
+ for originals in original_dataloader:
+ sample_originals.append(originals)
+ if len(sample_originals) == max_samples:
+ break
+
+ for sample in dataloader:
+ if data_split_cb is not None:
+ features, labels = data_split_cb(sample)
+ else:
+ features, labels = sample
+ if label_mapping_cb:
+ labels = label_mapping_cb(labels)
+
+ sample_batches.append(features)
+ sample_labels.append(labels)
+ if len(sample_batches) == max_samples:
+ break
+
+ self.export_onnx(sample_batch=sample_batches[0])
+ self.export_pytorch(export_entire_model=export_entire_model)
+ try:
+ if trace_script:
+ self.export_torchscript(sample_batch=sample_batches[0])
+ else:
+ self.export_torchscript()
+ except Exception as e:
+ if fail_on_torchscript_failure:
+ raise e
+ else:
+ _LOGGER.warn(
+ f"Unable to create torchscript file. Following error occurred: {e}"
+ )
+
+ self.export_samples(
+ sample_batches,
+ sample_labels=sample_labels,
+ sample_originals=sample_originals,
+ )
+
[docs] def export_onnx(
self,
sample_batch: Any,
name: str = "model.onnx",
opset: int = DEFAULT_ONNX_OPSET,
disable_bn_fusing: bool = True,
+ convert_qat: bool = False,
):
"""
Export an onnx file for the current module and for a sample batch.
@@ -263,6 +352,10 @@ Source code for sparseml.pytorch.utils.exporter
<
sensitivity analyses of the exported graph. Additionally, the DeepSparse
inference engine, and other engines, perform batch norm fusing at model
compilation.
+ :param convert_qat: if True and quantization aware training is detected in
+ the module being exported, the resulting QAT ONNX model will be converted
+ to a fully quantized ONNX model using `quantize_torch_qat_export`. Default
+ is False.
"""
sample_batch = tensors_to_device(sample_batch, "cpu")
onnx_path = os.path.join(self._output_dir, name)
@@ -329,7 +422,32 @@ Source code for sparseml.pytorch.utils.exporter
<
if batch_norms_wrapped:
onnx_model = onnx.load(onnx_path)
_delete_trivial_onnx_adds(onnx_model)
- onnx.save(onnx_model, onnx_path)
+ onnx.save(onnx_model, onnx_path)
+
+ if convert_qat and is_quant_module:
+ # overwrite exported model with fully quantized version
+ quantize_torch_qat_export(model=onnx_path, output_file_path=onnx_path)
+
+[docs] def export_torchscript(
+ self,
+ name: str = "model.pts",
+ sample_batch: Optional[Any] = None,
+ ):
+ """
+ Export the torchscript into a pts file within a framework directory. If
+ a sample batch is provided, will create torchscript model in trace mode.
+ Otherwise uses script to create torchscript.
+
+ :param name: name of the torchscript file to save
+ :param sample_batch: If provided, will create torchscript model via tracing
+ using the sample_batch
+ """
+ path = os.path.join(self._output_dir, "framework", name)
+ create_parent_dirs(path)
+ if sample_batch:
+ trace_model(path, self._module, sample_batch)
+ else:
+ script_model(path, self._module)
[docs] def export_pytorch(
self,
@@ -338,6 +456,7 @@ Source code for sparseml.pytorch.utils.exporter
<
name: str = "model.pth",
use_zipfile_serialization_if_available: bool = True,
include_modifiers: bool = False,
+ export_entire_model: bool = False,
):
"""
Export the pytorch state dicts into pth file within a
@@ -351,25 +470,31 @@ Source code for sparseml.pytorch.utils.exporter
<
:param include_modifiers: if True, and a ScheduledOptimizer is provided
as the optimizer, the associated ScheduledModifierManager and its
Modifiers will be exported under the 'manager' key. Default is False
+ :param export_entire_model: Exports entire file instead of state_dict
"""
- pytorch_path = os.path.join(self._output_dir, "pytorch")
+ pytorch_path = os.path.join(self._output_dir, "framework")
pth_path = os.path.join(pytorch_path, name)
create_parent_dirs(pth_path)
- save_model(
- pth_path,
- self._module,
- optimizer,
- epoch,
- use_zipfile_serialization_if_available=(
- use_zipfile_serialization_if_available
- ),
- include_modifiers=include_modifiers,
- )
+
+ if export_entire_model:
+ torch.save(self._module, pth_path)
+ else:
+ save_model(
+ pth_path,
+ self._module,
+ optimizer,
+ epoch,
+ use_zipfile_serialization_if_available=(
+ use_zipfile_serialization_if_available
+ ),
+ include_modifiers=include_modifiers,
+ )
[docs] def export_samples(
self,
sample_batches: List[Any],
- sample_labels: List[Any] = None,
+ sample_labels: Optional[List[Any]] = None,
+ sample_originals: Optional[List[Any]] = None,
exp_counter: int = 0,
):
"""
@@ -382,14 +507,18 @@ Source code for sparseml.pytorch.utils.exporter
<
:param exp_counter: the counter to start exporting the tensor files at
"""
sample_batches = [tensors_to_device(batch, "cpu") for batch in sample_batches]
- inputs_dir = os.path.join(self._output_dir, "_sample-inputs")
- outputs_dir = os.path.join(self._output_dir, "_sample-outputs")
- labels_dir = os.path.join(self._output_dir, "_sample-labels")
+ inputs_dir = os.path.join(self._output_dir, "sample-inputs")
+ outputs_dir = os.path.join(self._output_dir, "sample-outputs")
+ labels_dir = os.path.join(self._output_dir, "sample-labels")
+ originals_dir = os.path.join(self._output_dir, "sample-originals")
with torch.no_grad():
- for batch, lab in zip(
+ for batch, lab, orig in zip(
sample_batches,
sample_labels if sample_labels else [None for _ in sample_batches],
+ sample_originals
+ if sample_originals
+ else [None for _ in sample_batches],
):
out = tensors_module_forward(batch, self._module)
@@ -418,6 +547,15 @@ Source code for sparseml.pytorch.utils.exporter
<
lab, labels_dir, "lab", counter=exp_counter, break_batch=True
)
+ if orig is not None:
+ tensors_export(
+ orig,
+ originals_dir,
+ "orig",
+ counter=exp_counter,
+ break_batch=True,
+ )
+
assert len(exported_input) == len(exported_output)
exp_counter += len(exported_input)
diff --git a/sparseml/_modules/sparseml/pytorch/utils/helpers.html b/sparseml/_modules/sparseml/pytorch/utils/helpers.html
index 576b2686d66..6fcc166e901 100644
--- a/sparseml/_modules/sparseml/pytorch/utils/helpers.html
+++ b/sparseml/_modules/sparseml/pytorch/utils/helpers.html
@@ -533,7 +533,6 @@ Source code for sparseml.pytorch.utils.helpers
:param npz: True to export as an npz file, False otherwise
:return: the path of the numpy file the tensor was exported to
"""
-
if isinstance(tensor, Tensor):
tensor = tensor.detach().cpu().numpy()
elif isinstance(tensor, Dict):
@@ -541,7 +540,10 @@
Source code for sparseml.pytorch.utils.helpers
(key, val.detach().cpu().numpy()) for key, val in tensor.items()
)
elif isinstance(tensor, Iterable):
- tensor = [val.detach().cpu().numpy() for val in tensor]
+ tensor = [
+ val.detach().cpu().numpy() if isinstance(val, Tensor) else val
+ for val in tensor
+ ]
else:
raise ValueError("Unrecognized type given for tensorr {}".format(tensor))
diff --git a/sparseml/_modules/sparseml/pytorch/utils/loss.html b/sparseml/_modules/sparseml/pytorch/utils/loss.html
index d9289cdee99..a32d9e35cee 100644
--- a/sparseml/_modules/sparseml/pytorch/utils/loss.html
+++ b/sparseml/_modules/sparseml/pytorch/utils/loss.html
@@ -397,7 +397,7 @@
Source code for sparseml.pytorch.utils.loss
[docs]class InceptionCrossEntropyLossWrapper(LossWrapper):
"""
- Loss wrapper for training an inception model that as an aux output
+ Loss wrapper for training an inception model that has an aux output
with cross entropy.
Defines the loss in the following way:
diff --git a/sparseml/_modules/sparseml/pytorch/utils/model.html b/sparseml/_modules/sparseml/pytorch/utils/model.html
index 774b0c68747..d1c5bdd038d 100644
--- a/sparseml/_modules/sparseml/pytorch/utils/model.html
+++ b/sparseml/_modules/sparseml/pytorch/utils/model.html
@@ -194,7 +194,7 @@ Source code for sparseml.pytorch.utils.model
"""
from collections import OrderedDict
-from typing import List, Tuple, Union
+from typing import Any, List, Tuple, Union
import torch
from torch.nn import DataParallel, Module
@@ -218,6 +218,8 @@ Source code for sparseml.pytorch.utils.model
"load_optimizer",
"load_epoch",
"save_model",
+ "script_model",
+ "trace_model",
"model_to_device",
"parallelize_model",
"device_to_name_ids",
@@ -319,6 +321,38 @@ Source code for sparseml.pytorch.utils.model
return None
+[docs]def trace_model(
+ path: str,
+ model: Module,
+ sample_batch: Any,
+):
+ """
+ Convenience function which traces the provided module using the sample batch
+ into a TorchScript script and saves to provied path.
+
+ :param path: path to save torchscript
+ :param model: module to convert to TorchScript
+ :param sample_batch: sample batch to trace module with
+ """
+ script = torch.jit.trace_module(model, {"forward": sample_batch})
+ torch.jit.save(script, path)
+
+
+[docs]def script_model(
+ path: str,
+ model: Module,
+):
+ """
+ Convenience function which scripts the provided module into a TorchScript script
+ and saves to provied path.
+
+ :param path: path to save torchscript
+ :param model: module to convert to torchscript
+ """
+ script = torch.jit.script(model)
+ torch.jit.save(script, path)
+
+
[docs]def save_model(
path: str,
model: Module,
diff --git a/sparseml/_modules/sparseml/pytorch/optim/quantization/helpers.html b/sparseml/_modules/sparseml/pytorch/utils/quantization/helpers.html
similarity index 98%
rename from sparseml/_modules/sparseml/pytorch/optim/quantization/helpers.html
rename to sparseml/_modules/sparseml/pytorch/utils/quantization/helpers.html
index 9d2f07805b7..7493bca50cb 100644
--- a/sparseml/_modules/sparseml/pytorch/optim/quantization/helpers.html
+++ b/sparseml/_modules/sparseml/pytorch/utils/quantization/helpers.html
@@ -7,7 +7,7 @@
- sparseml.pytorch.optim.quantization.helpers — SparseML 0.1.0 documentation
+ sparseml.pytorch.utils.quantization.helpers — SparseML 0.1.0 documentation
@@ -159,7 +159,7 @@