diff --git a/deepsparse/_modules/deepsparse/benchmark.html b/deepsparse/_modules/deepsparse/benchmark.html index fc246f0c691..9a8c0fa3d36 100644 --- a/deepsparse/_modules/deepsparse/benchmark.html +++ b/deepsparse/_modules/deepsparse/benchmark.html @@ -108,10 +108,11 @@
- + diff --git a/deepsparse/_modules/deepsparse/cpu.html b/deepsparse/_modules/deepsparse/cpu.html index beee9d15555..5b9c8b05eab 100644 --- a/deepsparse/_modules/deepsparse/cpu.html +++ b/deepsparse/_modules/deepsparse/cpu.html @@ -108,10 +108,11 @@ - + @@ -196,11 +197,12 @@
code related to detecting the details of the currently available cpu
"""
-from typing import Tuple
+import json
import os
-import sys
import subprocess
-import json
+import sys
+from typing import Tuple
+
__all__ = ["cpu_details", "VALID_VECTOR_EXTENSIONS"]
@@ -312,8 +314,9 @@ Source code for deepsparse.cpu
)
if avx_type_override not in VALID_VECTOR_EXTENSIONS:
raise OSError(
- "neuralmagic: invalid avx instruction set '{}' must be one of {}."
- .format(avx_type_override, ",".join(VALID_VECTOR_EXTENSIONS))
+ "neuralmagic: invalid avx instruction set '{}' must be one of {}.".format(
+ avx_type_override, ",".join(VALID_VECTOR_EXTENSIONS)
+ )
)
arch.override_isa(avx_type_override)
diff --git a/deepsparse/_modules/deepsparse/engine.html b/deepsparse/_modules/deepsparse/engine.html
index 6e4b3816c66..7066a3d2e13 100644
--- a/deepsparse/_modules/deepsparse/engine.html
+++ b/deepsparse/_modules/deepsparse/engine.html
@@ -108,10 +108,11 @@
-
+
@@ -201,15 +202,21 @@ Source code for deepsparse.engine
from typing import Dict, Iterable, List, Optional, Tuple, Union
import numpy
+from tqdm.auto import tqdm
from deepsparse.benchmark import BenchmarkResults
try:
+ from sparsezoo import Zoo
from sparsezoo.objects import File, Model
-except Exception:
+
+ sparsezoo_import_error = None
+except Exception as sparsezoo_err:
+ Zoo = None
Model = object
File = object
+ sparsezoo_import_error = sparsezoo_err
try:
# flake8: noqa
@@ -235,9 +242,13 @@ Source code for deepsparse.engine
if not model:
raise ValueError("model must be a path, sparsezoo.Model, or sparsezoo.File")
- if isinstance(model, str):
- pass
- elif Model is not object and isinstance(model, Model):
+ if isinstance(model, str) and model.startswith("zoo:"):
+ # load SparseZoo Model from stub
+ if sparsezoo_import_error is not None:
+ raise sparsezoo_import_error
+ model = Zoo.load_model_from_stub(model)
+
+ if Model is not object and isinstance(model, Model):
# default to the main onnx file for the model
model = model.onnx_file.downloaded_path()
elif File is not object and isinstance(model, File):
@@ -285,8 +296,9 @@ Source code for deepsparse.engine
| # create an engine for batch size 1 on all available cores
| engine = Engine("path/to/onnx", batch_size=1, num_cores=None)
- :param model: Either a path to the model's onnx file, a sparsezoo Model object,
- or a sparsezoo ONNX File object that defines the neural network
+ :param model: Either a path to the model's onnx file, a SparseZoo model stub
+ prefixed by 'zoo:', a SparseZoo Model object, or a SparseZoo ONNX File
+ object that defines the neural network
:param batch_size: The batch size of the inputs to be used with the engine
:param num_cores: The number of physical cores to run the model on.
Pass None or 0 to run on the max number of cores
@@ -504,6 +516,7 @@ Source code for deepsparse.engine
num_warmup_iterations: int = 5,
include_inputs: bool = False,
include_outputs: bool = False,
+ show_progress: bool = False,
) -> BenchmarkResults:
"""
A convenience function for quickly benchmarking the instantiated model
@@ -522,6 +535,7 @@ Source code for deepsparse.engine
will be added to the results. Default is False
:param include_outputs: If True, outputs from forward passes during benchmarking
will be added to the results. Default is False
+ :param show_progress: If True, will display a progress bar. Default is False
:return: the results of benchmarking
"""
# define data loader
@@ -535,6 +549,7 @@ Source code for deepsparse.engine
num_warmup_iterations=num_warmup_iterations,
include_inputs=include_inputs,
include_outputs=include_outputs,
+ show_progress=show_progress,
)
[docs] def benchmark_loader(
@@ -544,6 +559,7 @@ Source code for deepsparse.engine
num_warmup_iterations: int = 5,
include_inputs: bool = False,
include_outputs: bool = False,
+ show_progress: bool = False,
) -> BenchmarkResults:
"""
A convenience function for quickly benchmarking the instantiated model
@@ -562,6 +578,7 @@ Source code for deepsparse.engine
will be added to the results. Default is False
:param include_outputs: If True, outputs from forward passes during benchmarking
will be added to the results. Default is False
+ :param show_progress: If True, will display a progress bar. Default is False
:return: the results of benchmarking
"""
assert num_iterations >= 1 and num_warmup_iterations >= 0, (
@@ -571,13 +588,15 @@ Source code for deepsparse.engine
completed_iterations = 0
results = BenchmarkResults()
+ if show_progress:
+ progress_bar = tqdm(total=num_iterations)
+
while completed_iterations < num_warmup_iterations + num_iterations:
for batch in loader:
# run benchmark
start = time.time()
out = self.run(batch)
end = time.time()
- completed_iterations += 1
if completed_iterations >= num_warmup_iterations:
# update results if warmup iterations are completed
@@ -588,10 +607,17 @@ Source code for deepsparse.engine
inputs=batch if include_inputs else None,
outputs=out if include_outputs else None,
)
+ if show_progress:
+ progress_bar.update(1)
+
+ completed_iterations += 1
if completed_iterations >= num_warmup_iterations + num_iterations:
break
+ if show_progress:
+ progress_bar.close()
+
return results
def _validate_inputs(self, inp: List[numpy.ndarray]):
@@ -633,8 +659,9 @@ Source code for deepsparse.engine
Gives defaults of batch_size == 1 and num_cores == None
(will use all physical cores available on a single socket).
- :param model: Either a path to the model's onnx file, a sparsezoo Model object,
- or a sparsezoo ONNX File object that defines the neural network
+ :param model: Either a path to the model's onnx file, a SparseZoo model stub
+ prefixed by 'zoo:', a SparseZoo Model object, or a SparseZoo ONNX File
+ object that defines the neural network
:param batch_size: The batch size of the inputs to be used with the model
:param num_cores: The number of physical cores to run the model on.
Pass None or 0 to run on the max number of cores
@@ -653,6 +680,7 @@ Source code for deepsparse.engine
num_warmup_iterations: int = 5,
include_inputs: bool = False,
include_outputs: bool = False,
+ show_progress: bool = False,
) -> BenchmarkResults:
"""
Convenience function to benchmark a model in the DeepSparse Engine
@@ -660,8 +688,9 @@ Source code for deepsparse.engine
Gives defaults of batch_size == 1 and num_cores == None
(will use all physical cores available on a single socket).
- :param model: Either a path to the model's onnx file, a sparsezoo Model object,
- or a sparsezoo ONNX File object that defines the neural network
+ :param model: Either a path to the model's onnx file, a SparseZoo model stub
+ prefixed by 'zoo:', a SparseZoo Model object, or a SparseZoo ONNX File
+ object that defines the neural network
:param batch_size: The batch size of the inputs to be used with the model
:param num_cores: The number of physical cores to run the model on.
Pass None or 0 to run on the max number of cores
@@ -678,12 +707,18 @@ Source code for deepsparse.engine
will be added to the results. Default is False
:param include_outputs: If True, outputs from forward passes during benchmarking
will be added to the results. Default is False
+ :param show_progress: If True, will display a progress bar. Default is False
:return: the results of benchmarking
"""
model = compile_model(model, batch_size, num_cores)
return model.benchmark(
- inp, num_iterations, num_warmup_iterations, include_inputs, include_outputs
+ inp,
+ num_iterations,
+ num_warmup_iterations,
+ include_inputs,
+ include_outputs,
+ show_progress,
)
@@ -704,9 +739,9 @@ Source code for deepsparse.engine
Gives defaults of batch_size == 1 and num_cores == None
(will use all physical cores available on a single socket).
- :param model: Either a path to the model's onnx file, a sparsezoo Model object,
- or a sparsezoo ONNX File object that defines the neural network
- graph definition to analyze
+ :param model: Either a path to the model's onnx file, a SparseZoo model stub
+ prefixed by 'zoo:', a SparseZoo Model object, or a SparseZoo ONNX File
+ object that defines the neural network graph definition to analyze
:param inp: The list of inputs to pass to the engine for analyzing inference.
The expected order is the inputs order as defined in the ONNX graph.
:param batch_size: The batch size of the inputs to be used with the model
diff --git a/deepsparse/_modules/deepsparse/lib.html b/deepsparse/_modules/deepsparse/lib.html
index 6c082b01381..e1a3a7fb790 100644
--- a/deepsparse/_modules/deepsparse/lib.html
+++ b/deepsparse/_modules/deepsparse/lib.html
@@ -108,10 +108,11 @@
-
+
diff --git a/deepsparse/_modules/deepsparse/utils/data.html b/deepsparse/_modules/deepsparse/utils/data.html
index 5a39d014627..2181838b3e0 100644
--- a/deepsparse/_modules/deepsparse/utils/data.html
+++ b/deepsparse/_modules/deepsparse/utils/data.html
@@ -108,10 +108,11 @@
-
+
diff --git a/deepsparse/_modules/deepsparse/utils/log.html b/deepsparse/_modules/deepsparse/utils/log.html
index 3f4a4f371fe..5feb120c84c 100644
--- a/deepsparse/_modules/deepsparse/utils/log.html
+++ b/deepsparse/_modules/deepsparse/utils/log.html
@@ -108,10 +108,11 @@
-
+
diff --git a/deepsparse/_modules/deepsparse/utils/onnx.html b/deepsparse/_modules/deepsparse/utils/onnx.html
index 85d721efdef..4ebb253f4de 100644
--- a/deepsparse/_modules/deepsparse/utils/onnx.html
+++ b/deepsparse/_modules/deepsparse/utils/onnx.html
@@ -108,10 +108,11 @@
-
+
diff --git a/deepsparse/_modules/index.html b/deepsparse/_modules/index.html
index 6baefd17348..cec3996758c 100644
--- a/deepsparse/_modules/index.html
+++ b/deepsparse/_modules/index.html
@@ -108,10 +108,11 @@
-
+
diff --git a/deepsparse/_sources/api/deepsparse.rst.txt b/deepsparse/_sources/api/deepsparse.rst.txt
index 66aefc5f97e..5dde3ab11c6 100644
--- a/deepsparse/_sources/api/deepsparse.rst.txt
+++ b/deepsparse/_sources/api/deepsparse.rst.txt
@@ -1,3 +1,18 @@
+..
+ Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
deepsparse package
==================
diff --git a/deepsparse/_sources/api/deepsparse.utils.rst.txt b/deepsparse/_sources/api/deepsparse.utils.rst.txt
index 107a47d20d9..73ddc7e5d57 100644
--- a/deepsparse/_sources/api/deepsparse.utils.rst.txt
+++ b/deepsparse/_sources/api/deepsparse.utils.rst.txt
@@ -1,3 +1,18 @@
+..
+ Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
deepsparse.utils package
========================
diff --git a/deepsparse/_sources/api/modules.rst.txt b/deepsparse/_sources/api/modules.rst.txt
index e20c5074fdf..1871f62e5ba 100644
--- a/deepsparse/_sources/api/modules.rst.txt
+++ b/deepsparse/_sources/api/modules.rst.txt
@@ -1,3 +1,18 @@
+..
+ Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
deepsparse
==========
diff --git a/deepsparse/_sources/index.rst.txt b/deepsparse/_sources/index.rst.txt
index 7e3638e94c5..0a2344b29b2 100644
--- a/deepsparse/_sources/index.rst.txt
+++ b/deepsparse/_sources/index.rst.txt
@@ -17,16 +17,17 @@
DeepSparse |version|
====================
-CPU inference engine that delivers unprecedented performance for sparse models.
+Neural network inference engine that delivers GPU-class performance for sparsified models on CPUs
.. raw:: html
-
-
-
-
-
+
+
+
+
+
+
@@ -50,54 +51,59 @@ CPU inference engine that delivers unprecedented performance for sparse models.
Overview
========
-The DeepSparse Engine is a CPU runtime that delivers unprecedented performance by taking advantage of
-natural sparsity within neural networks to reduce compute required as well as accelerate memory bound workloads.
-It is focused on model deployment and scaling machine learning pipelines,
-fitting seamlessly into your existing deployments as an inference backend.
+The DeepSparse Engine is a CPU runtime that delivers GPU-class performance by taking advantage of sparsity within neural networks to reduce compute required as well as accelerate memory bound workloads.
+It is focused on model deployment and scaling machine learning pipelines, fitting seamlessly into your existing deployments as an inference backend.
+
+`This repository `_ includes package APIs along with examples to quickly get started benchmarking and inferencing sparse models.
+
+Sparsification
+==============
+
+Sparsification is the process of taking a trained deep learning model and removing redundant information from the overprecise and over-parameterized network resulting in a faster and smaller model.
+Techniques for sparsification are all encompassing including everything from inducing sparsity using `pruning `_ and `quantization `_ to enabling naturally occurring sparsity using `activation sparsity `_ or `winograd/FFT `_.
+When implemented correctly, these techniques result in significantly more performant and smaller models with limited to no effect on the baseline metrics.
+For example, pruning plus quantization can give over `7x improvements in performance `_ while recovering to nearly the same baseline accuracy.
+
+The Deep Sparse product suite builds on top of sparsification enabling you to easily apply the techniques to your datasets and models using recipe-driven approaches.
+Recipes encode the directions for how to sparsify a model into a simple, easily editable format.
+- Download a sparsification recipe and sparsified model from the `SparseZoo `_.
+- Alternatively, create a recipe for your model using `Sparsify `_.
+- Apply your recipe with only a few lines of code using `SparseML `_.
+- Finally, for GPU-level performance on CPUs, deploy your sparse-quantized model with the `DeepSparse Engine `_.
-`This GitHub repository `_ includes package APIs along with examples to quickly get started learning about and
-actually running sparse models.
+
+**Full Deep Sparse product flow:**
+
+
Compatibility
=============
-The DeepSparse Engine ingests models in the `ONNX `_ format,
-allowing for compatibility with `PyTorch `_,
-`TensorFlow `_, `Keras `_,
-and `many other frameworks `_ that support it.
+The DeepSparse Engine ingests models in the `ONNX `_ format,
+allowing for compatibility with `PyTorch `_,
+`TensorFlow `_, `Keras `_,
+and `many other frameworks `_ that support it.
This reduces the extra work of preparing your trained model for inference to just one step of exporting.
-Related Products
-================
-
-- `SparseZoo `_:
- Neural network model repository for highly sparse models and optimization recipes
-- `SparseML `_:
- Libraries for state-of-the-art deep neural network optimization algorithms,
- enabling simple pipelines integration with a few lines of code
-- `Sparsify `_:
- Easy-to-use autoML interface to optimize deep neural networks for
- better inference performance and a smaller footprint
-
Resources and Learning More
===========================
-- `SparseZoo Documentation `_
-- `SparseML Documentation `_
-- `Sparsify Documentation `_
-- `Neural Magic Blog `_,
- `Resources `_,
- `Website `_
+- `SparseZoo Documentation `_
+- `SparseML Documentation `_
+- `Sparsify Documentation `_
+- `Neural Magic Blog `_,
+ `Resources `_,
+ `Website `_
Release History
===============
Official builds are hosted on PyPi
-- stable: `deepsparse `_
-- nightly (dev): `deepsparse-nightly `_
+- stable: `deepsparse `_
+- nightly (dev): `deepsparse-nightly `_
Additionally, more information can be found via
-`GitHub Releases `_.
+`GitHub Releases `_.
.. toctree::
:maxdepth: 3
@@ -120,8 +126,9 @@ Additionally, more information can be found via
api/deepsparse
.. toctree::
- :maxdepth: 2
- :caption: Help and Support
+ :maxdepth: 3
+ :caption: Help
Bugs, Feature Requests
Support, General Q&A
+ Neural Magic Docs
\ No newline at end of file
diff --git a/deepsparse/_sources/quicktour.md.txt b/deepsparse/_sources/quicktour.md.txt
index 432c267f70e..7bb3c94a1ac 100644
--- a/deepsparse/_sources/quicktour.md.txt
+++ b/deepsparse/_sources/quicktour.md.txt
@@ -16,24 +16,22 @@ limitations under the License.
## Quick Tour
-To expedite inference and benchmarking on real models, we include the `sparsezoo` package.
-[SparseZoo](https://github.com/neuralmagic/sparsezoo) hosts inference optimized models,
-trained on repeatable optimization recipes using state-of-the-art techniques from
-[SparseML](https://github.com/neuralmagic/sparseml).
+To expedite inference and benchmarking on real models, we include the `sparsezoo` package. [SparseZoo](https://github.com/neuralmagic/sparsezoo) hosts inference-optimized models, trained on repeatable sparsification recipes using state-of-the-art techniques from [SparseML](https://github.com/neuralmagic/sparseml).
### Quickstart with SparseZoo ONNX Models
-**MobileNetV1 Dense**
+**ResNet-50 Dense**
-Here is how to quickly perform inference with DeepSparse Engine on a pre-trained dense MobileNetV1 from SparseZoo.
+Here is how to quickly perform inference with DeepSparse Engine on a pre-trained dense ResNet-50 from SparseZoo.
```python
from deepsparse import compile_model
from sparsezoo.models import classification
+
batch_size = 64
# Download model and compile as optimized executable for your machine
-model = classification.mobilenet_v1()
+model = classification.resnet_50()
engine = compile_model(model, batch_size=batch_size)
# Fetch sample input and predict output using engine
@@ -41,46 +39,68 @@ inputs = model.data_inputs.sample_batch(batch_size=batch_size)
outputs, inference_time = engine.timed_run(inputs)
```
-**MobileNetV1 Optimized**
+**ResNet-50 Sparsified**
-When exploring available optimized models, you can use the `Zoo.search_optimized_models`
-utility to find models that share a base.
+When exploring available optimized models, you can use the `Zoo.search_optimized_models` utility to find models that share a base.
-Let us try this on the dense MobileNetV1 to see what is available.
+Try this on the dense ResNet-50 to see what is available:
```python
from sparsezoo import Zoo
from sparsezoo.models import classification
-print(Zoo.search_optimized_models(classification.mobilenet_v1()))
+
+model = classification.resnet_50()
+print(Zoo.search_optimized_models(model))
```
+
Output:
-```
-[Model(stub=cv/classification/mobilenet_v1-1.0/pytorch/sparseml/imagenet/base-none),
- Model(stub=cv/classification/mobilenet_v1-1.0/pytorch/sparseml/imagenet/pruned-conservative),
- Model(stub=cv/classification/mobilenet_v1-1.0/pytorch/sparseml/imagenet/pruned-moderate),
- Model(stub=cv/classification/mobilenet_v1-1.0/pytorch/sparseml/imagenet/pruned_quant-moderate)]
+
+```shell
+[
+ Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/base-none),
+ Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned-conservative),
+ Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned-moderate),
+ Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned_quant-moderate),
+ Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet-augmented/pruned_quant-aggressive)
+]
```
-Great. We can see there are two pruned versions targeting FP32,
-`conservative` at 100% and `moderate` at >= 99% of baseline accuracy.
-There is also a `pruned_quant` variant targeting INT8.
+We can see there are two pruned versions targeting FP32 and two pruned, quantized versions targeting INT8.
+The `conservative`, `moderate`, and `aggressive` tags recover to 100%, >=99%, and <99% of baseline accuracy respectively.
-Let's say you want to evaluate best performance on FP32 and are okay with a small drop in accuracy,
-so we can choose `pruned-moderate` over `pruned-conservative`.
+For a version of ResNet-50 that recovers close to the baseline and is very performant, choose the pruned_quant-moderate model.
+This model will run [nearly 7x faster](https://neuralmagic.com/blog/benchmark-resnet50-with-deepsparse) than the baseline model on a compatible CPU (with the VNNI instruction set enabled).
+For hardware compatibility, see the Hardware Support section.
```python
from deepsparse import compile_model
-from sparsezoo.models import classification
-batch_size = 64
-
-model = classification.mobilenet_v1(optim_name="pruned", optim_category="moderate")
-engine = compile_model(model, batch_size=batch_size)
+import numpy
-inputs = model.data_inputs.sample_batch(batch_size=batch_size)
-outputs, inference_time = engine.timed_run(inputs)
+batch_size = 64
+sample_inputs = [numpy.random.randn(batch_size, 3, 224, 224).astype(numpy.float32)]
+
+# run baseline benchmarking
+engine_base = compile_model(
+ model="zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/base-none",
+ batch_size=batch_size,
+)
+benchmarks_base = engine_base.benchmark(sample_inputs)
+print(benchmarks_base)
+
+# run sparse benchmarking
+engine_sparse = compile_model(
+ model="zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned_quant-moderate",
+ batch_size=batch_size,
+)
+if not engine_sparse.cpu_vnni:
+ print("WARNING: VNNI instructions not detected, quantization speedup not well supported")
+benchmarks_sparse = engine_sparse.benchmark(sample_inputs)
+print(benchmarks_sparse)
+
+print(f"Speedup: {benchmarks_sparse.items_per_second / benchmarks_base.items_per_second:.2f}x")
```
-### Quickstart with custom ONNX models
+### Quickstart with Custom ONNX Models
We accept ONNX files for custom models, too. Simply plug in your model to compare performance with other solutions.
diff --git a/deepsparse/api/deepsparse.html b/deepsparse/api/deepsparse.html
index ceb4a956006..7f2be874cd4 100644
--- a/deepsparse/api/deepsparse.html
+++ b/deepsparse/api/deepsparse.html
@@ -123,10 +123,11 @@
-
@@ -592,8 +593,9 @@ Submodules
Parameters
-model – Either a path to the model’s onnx file, a sparsezoo Model object,
-or a sparsezoo ONNX File object that defines the neural network
+model – Either a path to the model’s onnx file, a SparseZoo model stub
+prefixed by ‘zoo:’, a SparseZoo Model object, or a SparseZoo ONNX File
+object that defines the neural network
batch_size – The batch size of the inputs to be used with the engine
num_cores – The number of physical cores to run the model on.
Pass None or 0 to run on the max number of cores
@@ -614,7 +616,7 @@
Submodules
-
-
benchmark
(inp: List[numpy.ndarray], num_iterations: int = 20, num_warmup_iterations: int = 5, include_inputs: bool = False, include_outputs: bool = False) → deepsparse.benchmark.BenchmarkResults[source]¶
+benchmark
(inp: List[numpy.ndarray], num_iterations: int = 20, num_warmup_iterations: int = 5, include_inputs: bool = False, include_outputs: bool = False, show_progress: bool = False) → deepsparse.benchmark.BenchmarkResults[source]¶
A convenience function for quickly benchmarking the instantiated model
on a given input in the DeepSparse Engine.
After executing, will return the summary statistics for benchmarking.
@@ -633,6 +635,7 @@ SubmodulesReturns
@@ -643,7 +646,7 @@ Submodules
-
-
benchmark_loader
(loader: Iterable[List[numpy.ndarray]], num_iterations: int = 20, num_warmup_iterations: int = 5, include_inputs: bool = False, include_outputs: bool = False) → deepsparse.benchmark.BenchmarkResults[source]¶
+benchmark_loader
(loader: Iterable[List[numpy.ndarray]], num_iterations: int = 20, num_warmup_iterations: int = 5, include_inputs: bool = False, include_outputs: bool = False, show_progress: bool = False) → deepsparse.benchmark.BenchmarkResults[source]¶
A convenience function for quickly benchmarking the instantiated model
on a give DataLoader in the DeepSparse Engine.
After executing, will return the summary statistics for benchmarking.
@@ -662,6 +665,7 @@ SubmodulesReturns
@@ -848,9 +852,9 @@ Submodules
- Parameters
-model – Either a path to the model’s onnx file, a sparsezoo Model object,
-or a sparsezoo ONNX File object that defines the neural network
-graph definition to analyze
+model – Either a path to the model’s onnx file, a SparseZoo model stub
+prefixed by ‘zoo:’, a SparseZoo Model object, or a SparseZoo ONNX File
+object that defines the neural network graph definition to analyze
inp – The list of inputs to pass to the engine for analyzing inference.
The expected order is the inputs order as defined in the ONNX graph.
batch_size – The batch size of the inputs to be used with the model
@@ -881,7 +885,7 @@ Submodules
-
-
deepsparse.engine.
benchmark_model
(model: Union[str, sparsezoo.objects.model.Model, sparsezoo.objects.file.File], inp: List[numpy.ndarray], batch_size: int = 1, num_cores: Optional[int] = None, num_iterations: int = 20, num_warmup_iterations: int = 5, include_inputs: bool = False, include_outputs: bool = False) → deepsparse.benchmark.BenchmarkResults[source]¶
+deepsparse.engine.
benchmark_model
(model: Union[str, sparsezoo.objects.model.Model, sparsezoo.objects.file.File], inp: List[numpy.ndarray], batch_size: int = 1, num_cores: Optional[int] = None, num_iterations: int = 20, num_warmup_iterations: int = 5, include_inputs: bool = False, include_outputs: bool = False, show_progress: bool = False) → deepsparse.benchmark.BenchmarkResults[source]¶
Convenience function to benchmark a model in the DeepSparse Engine
from an ONNX file for inference.
Gives defaults of batch_size == 1 and num_cores == None
@@ -889,8 +893,9 @@
Submodules
- Parameters
-model – Either a path to the model’s onnx file, a sparsezoo Model object,
-or a sparsezoo ONNX File object that defines the neural network
+model – Either a path to the model’s onnx file, a SparseZoo model stub
+prefixed by ‘zoo:’, a SparseZoo Model object, or a SparseZoo ONNX File
+object that defines the neural network
batch_size – The batch size of the inputs to be used with the model
num_cores – The number of physical cores to run the model on.
Pass None or 0 to run on the max number of cores
@@ -907,6 +912,7 @@
SubmodulesReturns
@@ -925,8 +931,9 @@ Submodules
- Parameters
-model – Either a path to the model’s onnx file, a sparsezoo Model object,
-or a sparsezoo ONNX File object that defines the neural network
+model – Either a path to the model’s onnx file, a SparseZoo model stub
+prefixed by ‘zoo:’, a SparseZoo Model object, or a SparseZoo ONNX File
+object that defines the neural network
batch_size – The batch size of the inputs to be used with the model
num_cores – The number of physical cores to run the model on.
Pass None or 0 to run on the max number of cores
diff --git a/deepsparse/api/deepsparse.utils.html b/deepsparse/api/deepsparse.utils.html
index 22bdd6f28be..9bc63bfc8cd 100644
--- a/deepsparse/api/deepsparse.utils.html
+++ b/deepsparse/api/deepsparse.utils.html
@@ -129,10 +129,11 @@
-
+
diff --git a/deepsparse/api/modules.html b/deepsparse/api/modules.html
index fde843cca53..2df365f5890 100644
--- a/deepsparse/api/modules.html
+++ b/deepsparse/api/modules.html
@@ -108,10 +108,11 @@
-
+
diff --git a/deepsparse/genindex.html b/deepsparse/genindex.html
index 3845ba31a31..01453f7f562 100644
--- a/deepsparse/genindex.html
+++ b/deepsparse/genindex.html
@@ -108,10 +108,11 @@
-
+
diff --git a/deepsparse/index.html b/deepsparse/index.html
index 3b72f96f8d8..6ae56065d9e 100644
--- a/deepsparse/index.html
+++ b/deepsparse/index.html
@@ -109,10 +109,11 @@
-
+
@@ -182,13 +183,14 @@
DeepSparse 0.1¶
-CPU inference engine that delivers unprecedented performance for sparse models.
+Neural network inference engine that delivers GPU-class performance for sparsified models on CPUs
-
-
-
-
-
+
+
+
+
+
+
@@ -209,58 +211,57 @@ DeepSparse 0.1
Overview¶
-The DeepSparse Engine is a CPU runtime that delivers unprecedented performance by taking advantage of
-natural sparsity within neural networks to reduce compute required as well as accelerate memory bound workloads.
-It is focused on model deployment and scaling machine learning pipelines,
-fitting seamlessly into your existing deployments as an inference backend.
-This GitHub repository includes package APIs along with examples to quickly get started learning about and
-actually running sparse models.
+The DeepSparse Engine is a CPU runtime that delivers GPU-class performance by taking advantage of sparsity within neural networks to reduce compute required as well as accelerate memory bound workloads.
+It is focused on model deployment and scaling machine learning pipelines, fitting seamlessly into your existing deployments as an inference backend.
+This repository includes package APIs along with examples to quickly get started benchmarking and inferencing sparse models.
+
+
+Sparsification¶
+Sparsification is the process of taking a trained deep learning model and removing redundant information from the overprecise and over-parameterized network resulting in a faster and smaller model.
+Techniques for sparsification are all encompassing including everything from inducing sparsity using pruning and quantization to enabling naturally occurring sparsity using activation sparsity or winograd/FFT.
+When implemented correctly, these techniques result in significantly more performant and smaller models with limited to no effect on the baseline metrics.
+For example, pruning plus quantization can give over 7x improvements in performance while recovering to nearly the same baseline accuracy.
+The Deep Sparse product suite builds on top of sparsification enabling you to easily apply the techniques to your datasets and models using recipe-driven approaches.
+Recipes encode the directions for how to sparsify a model into a simple, easily editable format.
+- Download a sparsification recipe and sparsified model from the SparseZoo.
+- Alternatively, create a recipe for your model using Sparsify.
+- Apply your recipe with only a few lines of code using SparseML.
+- Finally, for GPU-level performance on CPUs, deploy your sparse-quantized model with the DeepSparse Engine.
+Full Deep Sparse product flow:
+<img src=”https://docs.neuralmagic.com/docs/source/sparsification/flow-overview.svg” width=”960px”>
Compatibility¶
-The DeepSparse Engine ingests models in the ONNX format,
-allowing for compatibility with PyTorch,
-TensorFlow, Keras,
-and many other frameworks that support it.
+
The DeepSparse Engine ingests models in the ONNX format,
+allowing for compatibility with PyTorch,
+TensorFlow, Keras,
+and many other frameworks that support it.
This reduces the extra work of preparing your trained model for inference to just one step of exporting.
-
Resources and Learning More¶
Release History¶
Official builds are hosted on PyPi
-- stable: deepsparse
-- nightly (dev): deepsparse-nightly
+- stable: deepsparse
+- nightly (dev): deepsparse-nightly
Additionally, more information can be found via
-GitHub Releases.
+GitHub Releases.
diff --git a/deepsparse/py-modindex.html b/deepsparse/py-modindex.html
index f1b91395df2..2d5371b8424 100644
--- a/deepsparse/py-modindex.html
+++ b/deepsparse/py-modindex.html
@@ -111,10 +111,11 @@
-
+
diff --git a/deepsparse/quicktour.html b/deepsparse/quicktour.html
index 21c2452ec90..df0091d8ae5 100644
--- a/deepsparse/quicktour.html
+++ b/deepsparse/quicktour.html
@@ -100,7 +100,7 @@
- Quick Tour
- Installation
@@ -114,10 +114,11 @@
-
+
@@ -201,20 +202,18 @@
limitations under the License.
-->
Quick Tour¶
-To expedite inference and benchmarking on real models, we include the sparsezoo
package.
-SparseZoo hosts inference optimized models,
-trained on repeatable optimization recipes using state-of-the-art techniques from
-SparseML.
+To expedite inference and benchmarking on real models, we include the sparsezoo
package. SparseZoo hosts inference-optimized models, trained on repeatable sparsification recipes using state-of-the-art techniques from SparseML.
Quickstart with SparseZoo ONNX Models¶
-MobileNetV1 Dense
-Here is how to quickly perform inference with DeepSparse Engine on a pre-trained dense MobileNetV1 from SparseZoo.
+ResNet-50 Dense
+Here is how to quickly perform inference with DeepSparse Engine on a pre-trained dense ResNet-50 from SparseZoo.
from deepsparse import compile_model
from sparsezoo.models import classification
+
batch_size = 64
# Download model and compile as optimized executable for your machine
-model = classification.mobilenet_v1()
+model = classification.resnet_50()
engine = compile_model(model, batch_size=batch_size)
# Fetch sample input and predict output using engine
@@ -222,41 +221,61 @@ Quickstart with SparseZoo ONNX Modelsoutputs, inference_time = engine.timed_run(inputs)
-MobileNetV1 Optimized
-When exploring available optimized models, you can use the Zoo.search_optimized_models
-utility to find models that share a base.
-Let us try this on the dense MobileNetV1 to see what is available.
+ResNet-50 Sparsified
+When exploring available optimized models, you can use the Zoo.search_optimized_models
utility to find models that share a base.
+Try this on the dense ResNet-50 to see what is available:
from sparsezoo import Zoo
from sparsezoo.models import classification
-print(Zoo.search_optimized_models(classification.mobilenet_v1()))
+
+model = classification.resnet_50()
+print(Zoo.search_optimized_models(model))
Output:
-[Model(stub=cv/classification/mobilenet_v1-1.0/pytorch/sparseml/imagenet/base-none),
- Model(stub=cv/classification/mobilenet_v1-1.0/pytorch/sparseml/imagenet/pruned-conservative),
- Model(stub=cv/classification/mobilenet_v1-1.0/pytorch/sparseml/imagenet/pruned-moderate),
- Model(stub=cv/classification/mobilenet_v1-1.0/pytorch/sparseml/imagenet/pruned_quant-moderate)]
+[
+ Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/base-none),
+ Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned-conservative),
+ Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned-moderate),
+ Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned_quant-moderate),
+ Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet-augmented/pruned_quant-aggressive)
+]
-Great. We can see there are two pruned versions targeting FP32,
-conservative
at 100% and moderate
at >= 99% of baseline accuracy.
-There is also a pruned_quant
variant targeting INT8.
-Let’s say you want to evaluate best performance on FP32 and are okay with a small drop in accuracy,
-so we can choose pruned-moderate
over pruned-conservative
.
+We can see there are two pruned versions targeting FP32 and two pruned, quantized versions targeting INT8.
+The conservative
, moderate
, and aggressive
tags recover to 100%, >=99%, and <99% of baseline accuracy respectively.
+For a version of ResNet-50 that recovers close to the baseline and is very performant, choose the pruned_quant-moderate model.
+This model will run nearly 7x faster than the baseline model on a compatible CPU (with the VNNI instruction set enabled).
+For hardware compatibility, see the Hardware Support section.
from deepsparse import compile_model
-from sparsezoo.models import classification
-batch_size = 64
+import numpy
-model = classification.mobilenet_v1(optim_name="pruned", optim_category="moderate")
-engine = compile_model(model, batch_size=batch_size)
-
-inputs = model.data_inputs.sample_batch(batch_size=batch_size)
-outputs, inference_time = engine.timed_run(inputs)
+batch_size = 64
+sample_inputs = [numpy.random.randn(batch_size, 3, 224, 224).astype(numpy.float32)]
+
+# run baseline benchmarking
+engine_base = compile_model(
+ model="zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/base-none",
+ batch_size=batch_size,
+)
+benchmarks_base = engine_base.benchmark(sample_inputs)
+print(benchmarks_base)
+
+# run sparse benchmarking
+engine_sparse = compile_model(
+ model="zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned_quant-moderate",
+ batch_size=batch_size,
+)
+if not engine_sparse.cpu_vnni:
+ print("WARNING: VNNI instructions not detected, quantization speedup not well supported")
+benchmarks_sparse = engine_sparse.benchmark(sample_inputs)
+print(benchmarks_sparse)
+
+print(f"Speedup: {benchmarks_sparse.items_per_second / benchmarks_base.items_per_second:.2f}x")
-Quickstart with custom ONNX models¶
+Quickstart with Custom ONNX Models¶
We accept ONNX files for custom models, too. Simply plug in your model to compare performance with other solutions.
> wget https://github.com/onnx/models/raw/master/vision/classification/mobilenet/model/mobilenetv2-7.onnx
Saving to: ‘mobilenetv2-7.onnx’
diff --git a/deepsparse/search.html b/deepsparse/search.html
index 616e245c014..fc89856429a 100644
--- a/deepsparse/search.html
+++ b/deepsparse/search.html
@@ -111,10 +111,11 @@
-
+
diff --git a/deepsparse/searchindex.js b/deepsparse/searchindex.js
index 3936c2ff966..86fe5c57b7f 100644
--- a/deepsparse/searchindex.js
+++ b/deepsparse/searchindex.js
@@ -1 +1 @@
-Search.setIndex({docnames:["api/deepsparse","api/deepsparse.utils","api/modules","debugging-optimizing/diagnostics-debugging","debugging-optimizing/example-log","debugging-optimizing/index","debugging-optimizing/numactl-utility","hardware","index","installation","quicktour"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":3,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":2,"sphinx.domains.rst":2,"sphinx.domains.std":1,"sphinx.ext.intersphinx":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["api/deepsparse.rst","api/deepsparse.utils.rst","api/modules.rst","debugging-optimizing/diagnostics-debugging.md","debugging-optimizing/example-log.md","debugging-optimizing/index.rst","debugging-optimizing/numactl-utility.md","hardware.md","index.rst","installation.md","quicktour.md"],objects:{"":{deepsparse:[0,0,0,"-"]},"deepsparse.benchmark":{BatchBenchmarkResult:[0,1,1,""],BenchmarkResults:[0,1,1,""]},"deepsparse.benchmark.BatchBenchmarkResult":{batch_size:[0,2,1,""],batches_per_second:[0,2,1,""],extras:[0,2,1,""],inputs:[0,2,1,""],items_per_second:[0,2,1,""],ms_per_batch:[0,2,1,""],ms_per_item:[0,2,1,""],outputs:[0,2,1,""],time_elapsed:[0,2,1,""],time_end:[0,2,1,""],time_start:[0,2,1,""]},"deepsparse.benchmark.BenchmarkResults":{append_batch:[0,2,1,""],batch_sizes:[0,2,1,""],batch_times:[0,2,1,""],batch_times_mean:[0,2,1,""],batch_times_median:[0,2,1,""],batch_times_std:[0,2,1,""],batches_per_second:[0,2,1,""],inputs:[0,2,1,""],items_per_second:[0,2,1,""],ms_per_batch:[0,2,1,""],ms_per_item:[0,2,1,""],num_batches:[0,2,1,""],num_items:[0,2,1,""],outputs:[0,2,1,""],results:[0,2,1,""]},"deepsparse.cpu":{cpu_details:[0,3,1,""]},"deepsparse.engine":{Engine:[0,1,1,""],analyze_model:[0,3,1,""],benchmark_model:[0,3,1,""],compile_model:[0,3,1,""]},"deepsparse.engine.Engine":{batch_size:[0,2,1,""],benchmark:[0,2,1,""],benchmark_loader:[0,2,1,""],cpu_avx_type:[0,2,1,""],cpu_vnni:[0,2,1,""],mapped_run:[0,2,1,""],model_path:[0,2,1,""],num_cores:[0,2,1,""],num_sockets:[0,2,1,""],run:[0,2,1,""],timed_run:[0,2,1,""]},"deepsparse.lib":{init_deepsparse_lib:[0,3,1,""]},"deepsparse.utils":{data:[1,0,0,"-"],log:[1,0,0,"-"],onnx:[1,0,0,"-"]},"deepsparse.utils.data":{verify_outputs:[1,3,1,""]},"deepsparse.utils.log":{log_init:[1,3,1,""]},"deepsparse.utils.onnx":{generate_random_inputs:[1,3,1,""],get_external_inputs:[1,3,1,""],get_external_outputs:[1,3,1,""],get_input_names:[1,3,1,""],get_output_names:[1,3,1,""],override_onnx_batch_size:[1,3,1,""]},deepsparse:{benchmark:[0,0,0,"-"],cpu:[0,0,0,"-"],engine:[0,0,0,"-"],lib:[0,0,0,"-"],utils:[1,0,0,"-"],version:[0,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:function"},terms:{"0008":1,"100":10,"104":4,"112":4,"121":4,"122":[3,4],"124":4,"126":4,"129":4,"130":4,"132":4,"140":4,"157":4,"224":[0,3,4],"242":4,"25690112":4,"265":[3,4],"276":4,"321":[3,4],"33918976":4,"478":[3,4],"512":7,"595":4,"604":4,"644":4,"652":4,"667":4,"672":4,"679":4,"684":4,"706":[3,4],"715":[3,4],"723":[3,4],"757396":4,"7f4fbbd3f740":[3,4],"96ce2f6cb23b8ab377012ed9ef38d3da3b9f5313":4,"case":0,"class":0,"const":[3,4],"default":[0,3,4],"export":[3,4,6,8],"float":[0,1,3,4],"function":0,"import":10,"int":[0,1],"new":[0,3,4],"return":[0,1],"super":[3,4],"true":0,"try":10,"while":0,For:[3,6],Ice:7,Its:3,One:[0,6],The:[0,1,3,4,6,7,8],Then:[3,9],There:[3,10],These:0,Useful:0,Using:[5,8],Will:0,about:[6,8],abov:6,absolut:1,acceler:8,accept:10,accuraci:10,achiev:0,across:0,activ:[0,7],actual:8,add:[0,3],added:0,addit:[5,8,9],addition:8,advantag:8,advis:6,affect:0,after:[0,3,6],against:3,agreement:3,algorithm:[7,8],all:[0,1,3,6],allclos:1,alloc:[3,6],allocate_buffers_pass:4,allow:[3,6,8],along:8,also:[6,10],altern:3,amd:[6,7],amount:[0,5,8],analysi:0,analyz:[0,3],analyze_model:0,ani:[0,6,9],anoth:6,api:8,append_batch:0,applic:3,architectur:[3,6],arrai:[0,1],art:[8,10],ascontiguousarrai:0,assert:0,assign:3,associ:3,assum:0,astyp:0,atol:1,automl:8,avail:[0,3,6,7,10],averag:0,avoid:0,avx2:[0,7],avx512:[0,4],avx:[0,7],backend:8,base:[0,10],baselin:10,basi:3,batch:[0,1,4,5],batch_siz:[0,1,10],batch_tim:0,batch_times_mean:0,batch_times_median:0,batch_times_std:0,batchbenchmarkresult:0,batches_per_second:0,been:[0,6],befor:[0,3],begin:[3,4],behavior:3,being:6,below:3,benchmark:[2,8,10],benchmark_load:0,benchmark_model:0,benchmarkresult:0,benefici:0,benefit:[0,3],best:[0,6,10],better:8,between:[3,6],bia:[3,4],binari:4,bind:[3,6],block:3,blog:8,bool:0,boost:7,both:0,bound:[6,8],bring:0,broadwel:7,bsd300:4,buffer:4,bug:8,build:[3,8],built:0,calc:[3,4],can:[0,3,6,8,10],cannon:7,cannot:4,captur:3,care:6,cascad:7,certain:[0,3],channel:0,check:1,choic:0,choos:10,classif:10,clone:9,code:[0,3,4,8],com:[3,10],command:3,commod:0,common:[0,3],commun:3,compar:[0,1,10],compil:[0,4,5,10],compile_model:[0,10],complex:3,complic:3,comput:[6,8],compute_func:[3,4],conflict:3,connect:3,consecut:6,consent:3,conserv:10,constant:[3,4],construct:4,construct_subgraph:4,consult:3,contain:[0,3],content:[2,3,8],contigu:0,control:[5,8],conv1:[3,4],conv2:[3,4],conv3:[3,4],conv4:[3,4],conv:[3,4],conveni:0,convolut:3,cooper:7,core:[0,4,5,6],correctli:0,correspond:0,could:[0,6],count:0,cours:6,cpu:[2,6,7,8],cpu_avx_typ:0,cpu_detail:0,cpu_vnni:0,cpunodebind:6,creat:[0,3,4],current:[0,6],custom:8,data:[0,2,3],data_input:10,dataload:0,dataset:3,debian:9,debug:8,decid:3,decis:3,deep:8,deepspars:[5,7,9,10],deepsparseengin:[3,6],defens:3,defin:0,definit:0,degrad:[3,6],deliv:8,dens:10,depend:9,deploy:[3,8],describ:[3,6],design:3,desir:[0,6],detail:[0,3,6,7],detect:0,determin:5,dev:8,deviat:0,diagnos:[3,5,8],diagnost:[5,8],dict:0,dictionari:0,differ:[1,3,4,6,7],dimens:[0,1],disabl:[3,6],displai:[3,6],document:8,doe:6,doing:6,download:10,drop:10,due:3,dure:[0,6],dynam:3,each:[0,1,5,6],easi:8,ecosystem:3,either:0,elaps:0,elementwis:[3,4],empti:4,emul:7,enabl:[5,6,7,8],end:[0,3,4,6],engin:[2,4,5,7,8,10],ensur:0,entir:[0,3],environ:[0,3,6,9],error:3,evalu:10,exampl:[0,5,6,8,9],except:0,execut:[0,4,5,6,10],exist:8,expect:0,expedit:10,explain:[3,4],explor:[9,10],extens:7,extern:1,extra:[0,8],fals:0,fatal:3,featur:8,fetch:10,few:8,file:[0,1,3,10],filter:4,find:[5,10],fine:6,finish:4,first:[0,3,6],fit:8,fix:0,float32:0,focus:8,follow:[3,4,6],footprint:8,forc:[0,6],format:[0,8],forward:0,found:8,four:6,fp32:10,framework:8,from:[0,3,4,6,10],func:0,further:6,gather:1,gener:[0,1,3,4,8,10],generate_random_input:[1,10],get:[3,8],get_external_input:1,get_external_output:1,get_input_nam:1,get_output_nam:1,getcap:4,github:[8,10],give:0,given:[0,6],gpu:0,grain:6,graph:[0,4,5],graph_util:[3,4],graphview:[3,4],great:10,greater:[3,4],gt_output:1,guid:3,guidanc:[5,8],hardwar:[6,8],has:6,haswel:7,have:0,height:0,help:8,here:[6,7,10],highli:[7,8],hit:[0,3],home:[3,4],host:[8,10],how:[0,3,10],howev:6,hpp:4,http:[3,10],hurt:3,hyper:6,ideal:3,idl:6,ignor:4,imagenet:10,impos:0,imposed_a:0,imposed_k:0,includ:[3,4,8,10],include_input:0,include_output:0,increas:3,incur:0,indic:3,infer:[0,3,8,10],inference_tim:10,info:4,inform:[0,3,8],ingest:8,init_deepsparse_lib:0,initi:3,inp:0,input:[0,1,3,4,10],input_data:4,instal:8,instanc:[0,3],instanti:0,instruct:[0,7],int16:0,int8:[0,10],intak:[3,4],integr:8,intel:[6,7],interfac:[0,8],inventori:6,isinst:0,issu:3,item:0,items_per_second:0,iter:0,its:1,jdoe:[3,4],jit:[3,4],just:8,keep:[6,9],kera:8,kernel:[0,3,4,7],lake:7,layer:0,learn:3,legal:3,let:10,level:[0,3,5,8],lib:[2,8],librari:8,like:[3,9],line:[3,8],linux:[0,7,9],list:[0,1,3],load:3,loader:0,local:0,locat:[3,6],log:[0,2,5,8],log_init:1,logic:6,look:3,machin:[0,3,8,10],macro:3,made:3,magic:[0,3,8],mai:[3,6],main:3,major:[3,6],make:3,man:6,manag:3,mani:[3,8],manual:3,map:6,mapped_run:0,master:10,match:[0,1,4],max:[0,1],maxim:3,maximum:6,mean:0,median:0,membind:6,memori:[0,6,8],method:0,microarchitectur:7,migrat:[3,6],millisecond:0,minim:0,mobilenet:10,mobilenet_v1:10,mobilenetv1:10,mobilenetv2:10,mode:6,model:[0,1,3,4,8],model_path:0,moder:10,modifi:1,modul:[2,8],monitor:3,monopol:3,more:[0,6],most:0,ms_per_batch:0,ms_per_item:0,multi:[0,6],multipl:6,must:0,name:[0,1],name_of_log:3,natur:8,ndarrai:0,need:6,network:[0,3,4,8],neural:[0,3,8],neuralmag:[3,4],neuralmagic_cr:4,neuralmagic_execut:4,newer:7,nightli:8,nm_arch:0,nm_bind_threads_to_cor:[3,6],nm_exec_test_it:4,nm_execution_provid:[3,4],nm_logging_level:[3,4],nm_ogging_level:3,nm_ort:[3,4],nm_subgraph:3,nm_subgraph_1:[3,4],nmexecutionprovid:4,node:[4,5],non:4,none:[0,1,4,10],normal:3,notabl:3,note:[0,3,5,8],notebook:9,num_batch:0,num_cor:0,num_item:0,num_iter:0,num_socket:0,num_warmup_iter:0,numactl:[5,8],number:[0,5,6],numpi:[0,1],nyann:[3,4],object:[0,1],obtain:5,offici:8,okai:10,onc:3,one:[0,3,6,8],onli:[0,3,4,6],onnx:[0,2,4,8],onnx_filenam:4,onnx_filepath:[1,10],onnxruntime_neuralmag:[3,4],openmp:3,oper:[3,4,7],ops:[3,4],optim:[0,4,6,7,8,10],optim_categori:10,optim_nam:10,optimization_level:0,option:[0,1],order:[0,3,9],ort:[3,4],other:[0,3,6,8,10],otherwis:0,our:5,out:[0,3,6],output:[0,1,3,4,10],output_data:4,outsid:6,over:[0,7,10],overrid:[0,1],override_onnx_batch_s:1,own:6,packag:[2,8,10],pad:[3,4],page:6,pair:1,param:1,paramet:[0,3,6],pars:[4,5,8],part:3,particular:6,pass:0,path:[0,1],pattern:4,per:[0,3,6],percentag:[0,3],perform:[0,5,6,8,10],physcpubind:6,physic:[0,6],pin:[3,5,8],pinpoint:3,pip:9,pipelin:[3,8],planner:4,pleas:0,plug:10,point:3,polici:3,pool:3,portion:[3,4],possibl:3,practic:3,pre:10,predict:10,prefer:6,prepar:8,prevent:[3,6],print:[3,4,10],privaci:3,process:[3,6],produc:[5,8],properti:0,provid:[1,3,4],prunabl:0,prune:[0,10],pruned_qu:10,pypi:8,python:[0,3,9],pytorch:[8,10],quantiz:[0,7],quick:8,quickli:[0,8,10],quickstart:8,rais:0,rand:0,random:[0,1,4,10],rang:6,ratio:4,raw:10,real:[3,10],recip:[8,10],recommend:[0,3,7,9],record:0,reduc:8,refer:1,regular:0,rel:1,relat:0,relu:[0,3,4],repeat:[0,10],repo:4,report:3,repositori:[8,9],request:8,requir:[4,6,8,9],reshap:[3,4],resolut:[3,4],resourc:[3,5],restrict:6,result:[0,3,6],reveal:3,review:3,rewrit:1,rtol:1,run:[0,3,4,6,7,8,10],run_model:3,runtim:[4,5,8],sai:10,same:6,sampl:[3,10],sample_batch:10,save:[1,10],scale:8,scarc:3,scienc:3,script:3,seamlessli:8,search_optimized_model:10,second:[0,6],see:[0,3,6,10],seek:3,select:6,self:3,separ:6,serv:3,server:6,set:[0,3,6],setup:0,sever:3,shape:[1,3,4],share:[3,6,10],shell:3,ship:3,should:6,show:[3,6],shuffl:0,signific:0,similar:1,similarli:6,simpl:8,simpli:10,simplif:3,sinc:6,singl:[0,6],size:[0,1,4,5],skylak:7,small:10,smaller:8,smt:6,socket:[0,6],softwar:3,solut:10,some:[0,3,7],sourc:[0,1],spars:[7,8],sparseml:[8,10],sparsezoo:[0,8],sparsifi:8,sparsiti:[0,7,8],specif:[0,6,7],specifi:[3,6],speedup:0,split:3,src:[3,4],stabl:8,standard:[0,6],start:[0,3,4,6,8],startup:6,state:[0,3,8,10],statement:3,statist:[0,5],stderr:3,steadi:0,step:8,store:0,str:[0,1],straightforward:6,stride:[3,4],string:1,structur:0,stub:10,subgraph:[4,5],submodul:[2,8],subpackag:[2,8],suffici:1,summari:0,super_resolut:4,support:[0,4,5,8],supported_subgraph:[3,4],system:[0,3,4,6,7,9],tabl:7,take:[0,8],target:10,tbb:3,technic:3,techniqu:10,tensor:[0,1,3],tensorflow:8,test:[4,9],test_1:4,than:[3,4,6],thei:[1,3],them:3,thi:[0,3,6,8,9,10],thread:[3,5,8],through:[0,6],tiger:7,time:[0,5],time_elaps:0,time_end:0,time_start:0,timed_run:[0,10],toler:1,too:10,took:0,torch:[3,4],total:4,tour:8,tradit:3,train:[3,8,10],translat:[3,4],transpos:[3,4],troubleshoot:3,truncat:3,tune:[5,8],tupl:0,two:[1,6,10],txt:3,type:[0,1,3],union:0,uniqu:3,unit:4,unlik:3,unoptim:3,unpreced:8,unsupport:[3,4],use:[0,3,6,8,10],used:[0,6],using:[0,3,4,6,9,10],usual:[1,6],util:[0,2,3,4,5,8,10],val_inp:0,valid:[0,7],validate_minimum_supported_fract:[3,4],valu:[3,4],variabl:[0,3,6],variant:10,variou:3,vector:0,verbos:[3,5,8],veri:3,verify_output:1,version:[2,4,8,10],via:8,view:5,virtual:9,vision:10,vnni:[0,7],wai:6,wand:4,want:[3,10],warm:0,warn:[3,4],websit:8,weight:[0,3,4],well:8,were:0,wget:10,what:[0,3,10],when:[0,6,10],where:[3,4,6],whether:3,which:[3,6],whole:5,width:0,within:[3,8],work:[6,7,8],workload:8,would:[3,6,9],x86:7,yet:0,you:[0,3,6,9,10],your:[3,8,9,10],zen:7,zoo:10},titles:["deepsparse package","deepsparse.utils package","deepsparse","Logging Guidance for Diagnostics and Debugging","Example Log, Verbose Level = diagnose","Debugging and Optimizing","Using the numactl Utility to Control Resource Utilization with the DeepSparse Engine","Hardware Support","DeepSparse 0.1","Installation","Quick Tour"],titleterms:{Using:6,addit:6,amount:3,batch:3,benchmark:0,compat:8,compil:3,content:[0,1],control:[3,6],core:3,cpu:0,custom:10,data:1,debug:[3,5],deepspars:[0,1,2,3,6,8],determin:3,diagnos:4,diagnost:3,each:3,enabl:3,engin:[0,3,6],exampl:[3,4],execut:3,find:3,graph:3,guidanc:3,hardwar:7,histori:8,instal:9,learn:8,level:4,lib:0,log:[1,3,4],model:10,modul:[0,1],more:8,node:3,note:6,numactl:6,number:3,obtain:3,onnx:[1,10],optim:[3,5],our:3,overview:8,packag:[0,1],pars:3,perform:3,pin:6,produc:3,product:8,quick:10,quickstart:10,relat:8,releas:8,resourc:[6,8],runtim:3,size:3,sparsezoo:10,statist:3,subgraph:3,submodul:[0,1],subpackag:0,support:[3,7],thread:6,time:3,tour:10,tune:3,util:[1,6],verbos:4,version:0,view:3,whole:3}})
\ No newline at end of file
+Search.setIndex({docnames:["api/deepsparse","api/deepsparse.utils","api/modules","debugging-optimizing/diagnostics-debugging","debugging-optimizing/example-log","debugging-optimizing/index","debugging-optimizing/numactl-utility","hardware","index","installation","quicktour"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":3,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":2,"sphinx.domains.rst":2,"sphinx.domains.std":1,"sphinx.ext.intersphinx":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["api/deepsparse.rst","api/deepsparse.utils.rst","api/modules.rst","debugging-optimizing/diagnostics-debugging.md","debugging-optimizing/example-log.md","debugging-optimizing/index.rst","debugging-optimizing/numactl-utility.md","hardware.md","index.rst","installation.md","quicktour.md"],objects:{"":{deepsparse:[0,0,0,"-"]},"deepsparse.benchmark":{BatchBenchmarkResult:[0,1,1,""],BenchmarkResults:[0,1,1,""]},"deepsparse.benchmark.BatchBenchmarkResult":{batch_size:[0,2,1,""],batches_per_second:[0,2,1,""],extras:[0,2,1,""],inputs:[0,2,1,""],items_per_second:[0,2,1,""],ms_per_batch:[0,2,1,""],ms_per_item:[0,2,1,""],outputs:[0,2,1,""],time_elapsed:[0,2,1,""],time_end:[0,2,1,""],time_start:[0,2,1,""]},"deepsparse.benchmark.BenchmarkResults":{append_batch:[0,2,1,""],batch_sizes:[0,2,1,""],batch_times:[0,2,1,""],batch_times_mean:[0,2,1,""],batch_times_median:[0,2,1,""],batch_times_std:[0,2,1,""],batches_per_second:[0,2,1,""],inputs:[0,2,1,""],items_per_second:[0,2,1,""],ms_per_batch:[0,2,1,""],ms_per_item:[0,2,1,""],num_batches:[0,2,1,""],num_items:[0,2,1,""],outputs:[0,2,1,""],results:[0,2,1,""]},"deepsparse.cpu":{cpu_details:[0,3,1,""]},"deepsparse.engine":{Engine:[0,1,1,""],analyze_model:[0,3,1,""],benchmark_model:[0,3,1,""],compile_model:[0,3,1,""]},"deepsparse.engine.Engine":{batch_size:[0,2,1,""],benchmark:[0,2,1,""],benchmark_loader:[0,2,1,""],cpu_avx_type:[0,2,1,""],cpu_vnni:[0,2,1,""],mapped_run:[0,2,1,""],model_path:[0,2,1,""],num_cores:[0,2,1,""],num_sockets:[0,2,1,""],run:[0,2,1,""],timed_run:[0,2,1,""]},"deepsparse.lib":{init_deepsparse_lib:[0,3,1,""]},"deepsparse.utils":{data:[1,0,0,"-"],log:[1,0,0,"-"],onnx:[1,0,0,"-"]},"deepsparse.utils.data":{verify_outputs:[1,3,1,""]},"deepsparse.utils.log":{log_init:[1,3,1,""]},"deepsparse.utils.onnx":{generate_random_inputs:[1,3,1,""],get_external_inputs:[1,3,1,""],get_external_outputs:[1,3,1,""],get_input_names:[1,3,1,""],get_output_names:[1,3,1,""],override_onnx_batch_size:[1,3,1,""]},deepsparse:{benchmark:[0,0,0,"-"],cpu:[0,0,0,"-"],engine:[0,0,0,"-"],lib:[0,0,0,"-"],utils:[1,0,0,"-"],version:[0,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:function"},terms:{"0008":1,"100":10,"104":4,"112":4,"121":4,"122":[3,4],"124":4,"126":4,"129":4,"130":4,"132":4,"140":4,"157":4,"224":[0,3,4,10],"242":4,"25690112":4,"265":[3,4],"276":4,"321":[3,4],"33918976":4,"478":[3,4],"512":7,"595":4,"604":4,"644":4,"652":4,"667":4,"672":4,"679":4,"684":4,"706":[3,4],"715":[3,4],"723":[3,4],"757396":4,"7f4fbbd3f740":[3,4],"960px":8,"96ce2f6cb23b8ab377012ed9ef38d3da3b9f5313":4,"case":0,"class":[0,8],"const":[3,4],"default":[0,3,4],"export":[3,4,6,8],"final":8,"float":[0,1,3,4],"function":0,"import":10,"int":[0,1],"new":[0,3,4],"return":[0,1],"super":[3,4],"true":0,"try":10,"while":[0,8],For:[3,6,8,10],Ice:7,Its:3,One:[0,6],The:[0,1,3,4,6,7,8,10],Then:[3,9],There:3,These:0,Useful:0,Using:[5,8],Will:0,about:6,abov:6,absolut:1,acceler:8,accept:10,accuraci:[8,10],achiev:0,across:0,activ:[0,7,8],actual:[],add:[0,3],added:0,addit:[5,8,9],addition:8,advantag:8,advis:6,affect:0,after:[0,3,6],against:3,aggress:10,agreement:3,algorithm:7,all:[0,1,3,6,8],allclos:1,alloc:[3,6],allocate_buffers_pass:4,allow:[3,6,8],along:8,also:6,altern:[3,8],amd:[6,7],amount:[0,5,8],analysi:0,analyz:[0,3],analyze_model:0,ani:[0,6,9],anoth:6,api:8,append_batch:0,appli:8,applic:3,approach:8,architectur:[3,6],arrai:[0,1],art:10,ascontiguousarrai:0,assert:0,assign:3,associ:3,assum:0,astyp:[0,10],atol:1,augment:10,automl:[],avail:[0,3,6,7,10],averag:0,avoid:0,avx2:[0,7],avx512:[0,4],avx:[0,7],backend:8,bar:0,base:[0,10],baselin:[8,10],basi:3,batch:[0,1,4,5],batch_siz:[0,1,10],batch_tim:0,batch_times_mean:0,batch_times_median:0,batch_times_std:0,batchbenchmarkresult:0,batches_per_second:0,been:[0,6],befor:[0,3],begin:[3,4],behavior:3,being:6,below:3,benchmark:[2,8,10],benchmark_load:0,benchmark_model:0,benchmarkresult:0,benchmarks_bas:10,benchmarks_spars:10,benefici:0,benefit:[0,3],best:[0,6],better:[],between:[3,6],bia:[3,4],binari:4,bind:[3,6],block:3,blog:8,bool:0,boost:7,both:0,bound:[6,8],bring:0,broadwel:7,bsd300:4,buffer:4,bug:8,build:[3,8],built:0,calc:[3,4],can:[0,3,6,8,10],cannon:7,cannot:4,captur:3,care:6,cascad:7,certain:[0,3],channel:0,check:1,choic:0,choos:10,classif:10,clone:9,close:10,code:[0,3,4,8],com:[3,8,10],command:3,commod:0,common:[0,3],commun:3,compar:[0,1,10],compat:10,compil:[0,4,5,10],compile_model:[0,10],complex:3,complic:3,comput:[6,8],compute_func:[3,4],conflict:3,connect:3,consecut:6,consent:3,conserv:10,constant:[3,4],construct:4,construct_subgraph:4,consult:3,contain:[0,3],content:[2,3,8],contigu:0,control:[5,8],conv1:[3,4],conv2:[3,4],conv3:[3,4],conv4:[3,4],conv:[3,4],conveni:0,convolut:3,cooper:7,core:[0,4,5,6],correctli:[0,8],correspond:0,could:[0,6],count:0,cours:6,cpu:[2,6,7,8,10],cpu_avx_typ:0,cpu_detail:0,cpu_vnni:[0,10],cpunodebind:6,creat:[0,3,4,8],current:[0,6],custom:8,data:[0,2,3],data_input:10,dataload:0,dataset:[3,8],debian:9,debug:8,decid:3,decis:3,deep:8,deepspars:[5,7,9,10],deepsparseengin:[3,6],defens:3,defin:0,definit:0,degrad:[3,6],deliv:8,dens:10,depend:9,deploi:8,deploy:[3,8],describ:[3,6],design:3,desir:[0,6],detail:[0,3,6,7],detect:[0,10],determin:5,dev:8,deviat:0,diagnos:[3,5,8],diagnost:[5,8],dict:0,dictionari:0,differ:[1,3,4,6,7],dimens:[0,1],direct:8,disabl:[3,6],displai:[0,3,6],doc:8,document:8,doe:6,doing:6,download:[8,10],driven:8,drop:[],due:3,dure:[0,6],dynam:3,each:[0,1,5,6],easi:[],easili:8,ecosystem:3,edit:8,effect:8,either:0,elaps:0,elementwis:[3,4],empti:4,emul:7,enabl:[5,6,7,8,10],encod:8,encompass:8,end:[0,3,4,6],engin:[2,4,5,7,8,10],engine_bas:10,engine_spars:10,ensur:0,entir:[0,3],environ:[0,3,6,9],error:3,evalu:[],everyth:8,exampl:[0,5,6,8,9],except:0,execut:[0,4,5,6,10],exist:8,expect:0,expedit:10,explain:[3,4],explor:[9,10],extens:7,extern:1,extra:[0,8],fals:0,faster:[8,10],fatal:3,featur:8,fetch:10,few:8,fft:8,file:[0,1,3,10],filter:4,find:[5,10],fine:6,finish:4,first:[0,3,6],fit:8,fix:0,float32:[0,10],flow:8,focus:8,follow:[3,4,6],footprint:[],forc:[0,6],format:[0,8],forward:0,found:8,four:6,fp32:10,framework:8,from:[0,3,4,6,8,10],full:8,func:0,further:6,gather:1,gener:[0,1,3,4,8,10],generate_random_input:[1,10],get:[3,8],get_external_input:1,get_external_output:1,get_input_nam:1,get_output_nam:1,getcap:4,github:[8,10],give:[0,8],given:[0,6],gpu:[0,8],grain:6,graph:[0,4,5],graph_util:[3,4],graphview:[3,4],great:[],greater:[3,4],gt_output:1,guid:3,guidanc:[5,8],hardwar:[6,8,10],has:6,haswel:7,have:0,height:0,help:8,here:[6,7,10],highli:7,hit:[0,3],home:[3,4],host:[8,10],how:[0,3,8,10],howev:6,hpp:4,http:[3,8,10],hurt:3,hyper:6,ideal:3,idl:6,ignor:4,imagenet:10,img:8,implement:8,impos:0,imposed_a:0,imposed_k:0,improv:8,includ:[3,4,8,10],include_input:0,include_output:0,increas:3,incur:0,indic:3,induc:8,infer:[0,3,8,10],inferenc:8,inference_tim:10,info:4,inform:[0,3,8],ingest:8,init_deepsparse_lib:0,initi:3,inp:0,input:[0,1,3,4,10],input_data:4,instal:8,instanc:[0,3],instanti:0,instruct:[0,7,10],int16:0,int8:[0,10],intak:[3,4],integr:[],intel:[6,7],interfac:0,inventori:6,isinst:0,issu:3,item:0,items_per_second:[0,10],iter:0,its:1,jdoe:[3,4],jit:[3,4],just:8,keep:[6,9],kera:8,kernel:[0,3,4,7],lake:7,layer:0,learn:3,legal:3,let:[],level:[0,3,5,8],lib:[2,8],librari:[],like:[3,9],limit:8,line:[3,8],linux:[0,7,9],list:[0,1,3],load:3,loader:0,local:0,locat:[3,6],log:[0,2,5,8],log_init:1,logic:6,look:3,machin:[0,3,8,10],macro:3,made:3,magic:[0,3,8],mai:[3,6],main:3,major:[3,6],make:3,man:6,manag:3,mani:[3,8],manual:3,map:6,mapped_run:0,master:10,match:[0,1,4],max:[0,1],maxim:3,maximum:6,mean:0,median:0,membind:6,memori:[0,6,8],method:0,metric:8,microarchitectur:7,migrat:[3,6],millisecond:0,minim:0,mobilenet:10,mobilenet_v1:[],mobilenetv1:[],mobilenetv2:10,mode:6,model:[0,1,3,4,8],model_path:0,moder:10,modifi:1,modul:[2,8],monitor:3,monopol:3,more:[0,6],most:0,ms_per_batch:0,ms_per_item:0,multi:[0,6],multipl:6,must:0,name:[0,1],name_of_log:3,natur:8,ndarrai:0,nearli:[8,10],need:6,network:[0,3,4,8],neural:[0,3,8],neuralmag:[3,4,8],neuralmagic_cr:4,neuralmagic_execut:4,newer:7,nightli:8,nm_arch:0,nm_bind_threads_to_cor:[3,6],nm_exec_test_it:4,nm_execution_provid:[3,4],nm_logging_level:[3,4],nm_ogging_level:3,nm_ort:[3,4],nm_subgraph:3,nm_subgraph_1:[3,4],nmexecutionprovid:4,node:[4,5],non:4,none:[0,1,4,10],normal:3,notabl:3,note:[0,3,5,8],notebook:9,num_batch:0,num_cor:0,num_item:0,num_iter:0,num_socket:0,num_warmup_iter:0,numactl:[5,8],number:[0,5,6],numpi:[0,1,10],nyann:[3,4],object:[0,1],obtain:5,occur:8,offici:8,okai:[],onc:3,one:[0,3,6,8],onli:[0,3,4,6,8],onnx:[0,2,4,8],onnx_filenam:4,onnx_filepath:[1,10],onnxruntime_neuralmag:[3,4],openmp:3,oper:[3,4,7],ops:[3,4],optim:[0,4,6,7,8,10],optim_categori:[],optim_nam:[],optimization_level:0,option:[0,1],order:[0,3,9],ort:[3,4],other:[0,3,6,8,10],otherwis:0,our:5,out:[0,3,6],output:[0,1,3,4,10],output_data:4,outsid:6,over:[0,7,8],overprecis:8,overrid:[0,1],override_onnx_batch_s:1,own:6,packag:[2,8,10],pad:[3,4],page:6,pair:1,param:1,paramet:[0,3,6],parameter:8,pars:[4,5,8],part:3,particular:6,pass:0,path:[0,1],pattern:4,per:[0,3,6],percentag:[0,3],perform:[0,5,6,8,10],physcpubind:6,physic:[0,6],pin:[3,5,8],pinpoint:3,pip:9,pipelin:[3,8],planner:4,pleas:0,plu:8,plug:10,point:3,polici:3,pool:3,portion:[3,4],possibl:3,practic:3,pre:10,predict:10,prefer:6,prefix:0,prepar:8,prevent:[3,6],print:[3,4,10],privaci:3,process:[3,6,8],produc:[5,8],product:8,progress:0,properti:0,provid:[1,3,4],prunabl:0,prune:[0,8,10],pruned_qu:10,pypi:8,python:[0,3,9],pytorch:[8,10],quant:10,quantiz:[0,7,8,10],quick:8,quickli:[0,8,10],quickstart:8,rais:0,rand:0,randn:10,random:[0,1,4,10],rang:6,ratio:4,raw:10,real:[3,10],recip:[8,10],recommend:[0,3,7,9],record:0,recov:[8,10],reduc:8,redund:8,refer:1,regular:0,rel:1,relat:0,relu:[0,3,4],remov:8,repeat:[0,10],repo:4,report:3,repositori:[8,9],request:8,requir:[4,6,8,9],reshap:[3,4],resnet:10,resnet_50:10,resnet_v1:10,resolut:[3,4],resourc:[3,5],respect:10,restrict:6,result:[0,3,6,8],reveal:3,review:3,rewrit:1,rtol:1,run:[0,3,4,6,7,10],run_model:3,runtim:[4,5,8],sai:[],same:[6,8],sampl:[3,10],sample_batch:10,sample_input:10,save:[1,10],scale:8,scarc:3,scienc:3,script:3,seamlessli:8,search_optimized_model:10,second:[0,6],section:10,see:[0,3,6,10],seek:3,select:6,self:3,separ:6,serv:3,server:6,set:[0,3,6,10],setup:0,sever:3,shape:[1,3,4],share:[3,6,10],shell:3,ship:3,should:6,show:[3,6],show_progress:0,shuffl:0,signific:0,significantli:8,similar:1,similarli:6,simpl:8,simpli:10,simplif:3,sinc:6,singl:[0,6],size:[0,1,4,5],skylak:7,small:[],smaller:8,smt:6,socket:[0,6],softwar:3,solut:10,some:[0,3,7],sourc:[0,1,8],spars:[7,8,10],sparseml:[8,10],sparsezoo:[0,8],sparsif:10,sparsifi:[8,10],sparsiti:[0,7,8],specif:[0,6,7],specifi:[3,6],speedup:[0,10],split:3,src:[3,4,8],stabl:8,standard:[0,6],start:[0,3,4,6,8],startup:6,state:[0,3,10],statement:3,statist:[0,5],stderr:3,steadi:0,step:8,store:0,str:[0,1],straightforward:6,stride:[3,4],string:1,structur:0,stub:[0,10],subgraph:[4,5],submodul:[2,8],subpackag:[2,8],suffici:1,suit:8,summari:0,super_resolut:4,support:[0,4,5,8,10],supported_subgraph:[3,4],svg:8,system:[0,3,4,6,7,9],tabl:7,tag:10,take:[0,8],target:10,tbb:3,technic:3,techniqu:[8,10],tensor:[0,1,3],tensorflow:8,test:[4,9],test_1:4,than:[3,4,6,10],thei:[1,3],them:3,thi:[0,3,6,8,9,10],thread:[3,5,8],through:[0,6],tiger:7,time:[0,5],time_elaps:0,time_end:0,time_start:0,timed_run:[0,10],toler:1,too:10,took:0,top:8,torch:[3,4],total:4,tour:8,tradit:3,train:[3,8,10],translat:[3,4],transpos:[3,4],troubleshoot:3,truncat:3,tune:[5,8],tupl:0,two:[1,6,10],txt:3,type:[0,1,3],union:0,uniqu:3,unit:4,unlik:3,unoptim:3,unpreced:[],unsupport:[3,4],use:[0,3,6,10],used:[0,6],using:[0,3,4,6,8,9,10],usual:[1,6],util:[0,2,3,4,5,8,10],val_inp:0,valid:[0,7],validate_minimum_supported_fract:[3,4],valu:[3,4],variabl:[0,3,6],variant:[],variou:3,vector:0,verbos:[3,5,8],veri:[3,10],verify_output:1,version:[2,4,8,10],via:8,view:5,virtual:9,vision:10,vnni:[0,7,10],wai:6,wand:4,want:3,warm:0,warn:[3,4,10],websit:8,weight:[0,3,4],well:[8,10],were:0,wget:10,what:[0,3,10],when:[0,6,8,10],where:[3,4,6],whether:3,which:[3,6],whole:5,width:[0,8],winograd:8,within:[3,8],work:[6,7,8],workload:8,would:[3,6,9],x86:7,yet:0,you:[0,3,6,8,9,10],your:[3,8,9,10],zen:7,zoo:[0,10]},titles:["deepsparse package","deepsparse.utils package","deepsparse","Logging Guidance for Diagnostics and Debugging","Example Log, Verbose Level = diagnose","Debugging and Optimizing","Using the numactl Utility to Control Resource Utilization with the DeepSparse Engine","Hardware Support","DeepSparse 0.1","Installation","Quick Tour"],titleterms:{Using:6,addit:6,amount:3,batch:3,benchmark:0,compat:8,compil:3,content:[0,1],control:[3,6],core:3,cpu:0,custom:10,data:1,debug:[3,5],deepspars:[0,1,2,3,6,8],determin:3,diagnos:4,diagnost:3,each:3,enabl:3,engin:[0,3,6],exampl:[3,4],execut:3,find:3,graph:3,guidanc:3,hardwar:7,histori:8,instal:9,learn:8,level:4,lib:0,log:[1,3,4],model:10,modul:[0,1],more:8,node:3,note:6,numactl:6,number:3,obtain:3,onnx:[1,10],optim:[3,5],our:3,overview:8,packag:[0,1],pars:3,perform:3,pin:6,produc:3,product:[],quick:10,quickstart:10,relat:[],releas:8,resourc:[6,8],runtim:3,size:3,sparsezoo:10,sparsif:8,statist:3,subgraph:3,submodul:[0,1],subpackag:0,support:[3,7],thread:6,time:3,tour:10,tune:3,util:[1,6],verbos:4,version:0,view:3,whole:3}})
\ No newline at end of file
+