diff --git a/deepsparse/_modules/deepsparse/benchmark.html b/deepsparse/_modules/deepsparse/benchmark.html index fc246f0c691..9a8c0fa3d36 100644 --- a/deepsparse/_modules/deepsparse/benchmark.html +++ b/deepsparse/_modules/deepsparse/benchmark.html @@ -108,10 +108,11 @@ -

Help and Support

+

Help

diff --git a/deepsparse/_modules/deepsparse/cpu.html b/deepsparse/_modules/deepsparse/cpu.html index beee9d15555..5b9c8b05eab 100644 --- a/deepsparse/_modules/deepsparse/cpu.html +++ b/deepsparse/_modules/deepsparse/cpu.html @@ -108,10 +108,11 @@ -

Help and Support

+

Help

@@ -196,11 +197,12 @@

Source code for deepsparse.cpu

 code related to detecting the details of the currently available cpu
 """
 
-from typing import Tuple
+import json
 import os
-import sys
 import subprocess
-import json
+import sys
+from typing import Tuple
+
 
 __all__ = ["cpu_details", "VALID_VECTOR_EXTENSIONS"]
 
@@ -312,8 +314,9 @@ 

Source code for deepsparse.cpu

         )
         if avx_type_override not in VALID_VECTOR_EXTENSIONS:
             raise OSError(
-                "neuralmagic: invalid avx instruction set '{}' must be one of {}."
-                .format(avx_type_override, ",".join(VALID_VECTOR_EXTENSIONS))
+                "neuralmagic: invalid avx instruction set '{}' must be one of {}.".format(
+                    avx_type_override, ",".join(VALID_VECTOR_EXTENSIONS)
+                )
             )
         arch.override_isa(avx_type_override)
 
diff --git a/deepsparse/_modules/deepsparse/engine.html b/deepsparse/_modules/deepsparse/engine.html
index 6e4b3816c66..7066a3d2e13 100644
--- a/deepsparse/_modules/deepsparse/engine.html
+++ b/deepsparse/_modules/deepsparse/engine.html
@@ -108,10 +108,11 @@
 
-

Help and Support

+

Help

@@ -201,15 +202,21 @@

Source code for deepsparse.engine

 from typing import Dict, Iterable, List, Optional, Tuple, Union
 
 import numpy
+from tqdm.auto import tqdm
 
 from deepsparse.benchmark import BenchmarkResults
 
 
 try:
+    from sparsezoo import Zoo
     from sparsezoo.objects import File, Model
-except Exception:
+
+    sparsezoo_import_error = None
+except Exception as sparsezoo_err:
+    Zoo = None
     Model = object
     File = object
+    sparsezoo_import_error = sparsezoo_err
 
 try:
     # flake8: noqa
@@ -235,9 +242,13 @@ 

Source code for deepsparse.engine

     if not model:
         raise ValueError("model must be a path, sparsezoo.Model, or sparsezoo.File")
 
-    if isinstance(model, str):
-        pass
-    elif Model is not object and isinstance(model, Model):
+    if isinstance(model, str) and model.startswith("zoo:"):
+        # load SparseZoo Model from stub
+        if sparsezoo_import_error is not None:
+            raise sparsezoo_import_error
+        model = Zoo.load_model_from_stub(model)
+
+    if Model is not object and isinstance(model, Model):
         # default to the main onnx file for the model
         model = model.onnx_file.downloaded_path()
     elif File is not object and isinstance(model, File):
@@ -285,8 +296,9 @@ 

Source code for deepsparse.engine

     |    # create an engine for batch size 1 on all available cores
     |    engine = Engine("path/to/onnx", batch_size=1, num_cores=None)
 
-    :param model: Either a path to the model's onnx file, a sparsezoo Model object,
-        or a sparsezoo ONNX File object that defines the neural network
+    :param model: Either a path to the model's onnx file, a SparseZoo model stub
+        prefixed by 'zoo:', a SparseZoo Model object, or a SparseZoo ONNX File
+        object that defines the neural network
     :param batch_size: The batch size of the inputs to be used with the engine
     :param num_cores: The number of physical cores to run the model on.
         Pass None or 0 to run on the max number of cores
@@ -504,6 +516,7 @@ 

Source code for deepsparse.engine

         num_warmup_iterations: int = 5,
         include_inputs: bool = False,
         include_outputs: bool = False,
+        show_progress: bool = False,
     ) -> BenchmarkResults:
         """
         A convenience function for quickly benchmarking the instantiated model
@@ -522,6 +535,7 @@ 

Source code for deepsparse.engine

             will be added to the results. Default is False
         :param include_outputs: If True, outputs from forward passes during benchmarking
             will be added to the results. Default is False
+        :param show_progress: If True, will display a progress bar. Default is False
         :return: the results of benchmarking
         """
         # define data loader
@@ -535,6 +549,7 @@ 

Source code for deepsparse.engine

             num_warmup_iterations=num_warmup_iterations,
             include_inputs=include_inputs,
             include_outputs=include_outputs,
+            show_progress=show_progress,
         )
[docs] def benchmark_loader( @@ -544,6 +559,7 @@

Source code for deepsparse.engine

         num_warmup_iterations: int = 5,
         include_inputs: bool = False,
         include_outputs: bool = False,
+        show_progress: bool = False,
     ) -> BenchmarkResults:
         """
         A convenience function for quickly benchmarking the instantiated model
@@ -562,6 +578,7 @@ 

Source code for deepsparse.engine

             will be added to the results. Default is False
         :param include_outputs: If True, outputs from forward passes during benchmarking
             will be added to the results. Default is False
+        :param show_progress: If True, will display a progress bar. Default is False
         :return: the results of benchmarking
         """
         assert num_iterations >= 1 and num_warmup_iterations >= 0, (
@@ -571,13 +588,15 @@ 

Source code for deepsparse.engine

         completed_iterations = 0
         results = BenchmarkResults()
 
+        if show_progress:
+            progress_bar = tqdm(total=num_iterations)
+
         while completed_iterations < num_warmup_iterations + num_iterations:
             for batch in loader:
                 # run benchmark
                 start = time.time()
                 out = self.run(batch)
                 end = time.time()
-                completed_iterations += 1
 
                 if completed_iterations >= num_warmup_iterations:
                     # update results if warmup iterations are completed
@@ -588,10 +607,17 @@ 

Source code for deepsparse.engine

                         inputs=batch if include_inputs else None,
                         outputs=out if include_outputs else None,
                     )
+                    if show_progress:
+                        progress_bar.update(1)
+
+                completed_iterations += 1
 
                 if completed_iterations >= num_warmup_iterations + num_iterations:
                     break
 
+        if show_progress:
+            progress_bar.close()
+
         return results
def _validate_inputs(self, inp: List[numpy.ndarray]): @@ -633,8 +659,9 @@

Source code for deepsparse.engine

     Gives defaults of batch_size == 1 and num_cores == None
     (will use all physical cores available on a single socket).
 
-    :param model: Either a path to the model's onnx file, a sparsezoo Model object,
-        or a sparsezoo ONNX File object that defines the neural network
+    :param model: Either a path to the model's onnx file, a SparseZoo model stub
+        prefixed by 'zoo:', a SparseZoo Model object, or a SparseZoo ONNX File
+        object that defines the neural network
     :param batch_size: The batch size of the inputs to be used with the model
     :param num_cores: The number of physical cores to run the model on.
         Pass None or 0 to run on the max number of cores
@@ -653,6 +680,7 @@ 

Source code for deepsparse.engine

     num_warmup_iterations: int = 5,
     include_inputs: bool = False,
     include_outputs: bool = False,
+    show_progress: bool = False,
 ) -> BenchmarkResults:
     """
     Convenience function to benchmark a model in the DeepSparse Engine
@@ -660,8 +688,9 @@ 

Source code for deepsparse.engine

     Gives defaults of batch_size == 1 and num_cores == None
     (will use all physical cores available on a single socket).
 
-    :param model: Either a path to the model's onnx file, a sparsezoo Model object,
-        or a sparsezoo ONNX File object that defines the neural network
+    :param model: Either a path to the model's onnx file, a SparseZoo model stub
+        prefixed by 'zoo:', a SparseZoo Model object, or a SparseZoo ONNX File
+        object that defines the neural network
     :param batch_size: The batch size of the inputs to be used with the model
     :param num_cores: The number of physical cores to run the model on.
         Pass None or 0 to run on the max number of cores
@@ -678,12 +707,18 @@ 

Source code for deepsparse.engine

         will be added to the results. Default is False
     :param include_outputs: If True, outputs from forward passes during benchmarking
         will be added to the results. Default is False
+    :param show_progress: If True, will display a progress bar. Default is False
     :return: the results of benchmarking
     """
     model = compile_model(model, batch_size, num_cores)
 
     return model.benchmark(
-        inp, num_iterations, num_warmup_iterations, include_inputs, include_outputs
+        inp,
+        num_iterations,
+        num_warmup_iterations,
+        include_inputs,
+        include_outputs,
+        show_progress,
     )
@@ -704,9 +739,9 @@

Source code for deepsparse.engine

     Gives defaults of batch_size == 1 and num_cores == None
     (will use all physical cores available on a single socket).
 
-    :param model: Either a path to the model's onnx file, a sparsezoo Model object,
-        or a sparsezoo ONNX File object that defines the neural network
-        graph definition to analyze
+    :param model: Either a path to the model's onnx file, a SparseZoo model stub
+        prefixed by 'zoo:', a SparseZoo Model object, or a SparseZoo ONNX File
+        object that defines the neural network graph definition to analyze
     :param inp: The list of inputs to pass to the engine for analyzing inference.
         The expected order is the inputs order as defined in the ONNX graph.
     :param batch_size: The batch size of the inputs to be used with the model
diff --git a/deepsparse/_modules/deepsparse/lib.html b/deepsparse/_modules/deepsparse/lib.html
index 6c082b01381..e1a3a7fb790 100644
--- a/deepsparse/_modules/deepsparse/lib.html
+++ b/deepsparse/_modules/deepsparse/lib.html
@@ -108,10 +108,11 @@
 
-

Help and Support

+

Help

diff --git a/deepsparse/_modules/deepsparse/utils/data.html b/deepsparse/_modules/deepsparse/utils/data.html index 5a39d014627..2181838b3e0 100644 --- a/deepsparse/_modules/deepsparse/utils/data.html +++ b/deepsparse/_modules/deepsparse/utils/data.html @@ -108,10 +108,11 @@ -

Help and Support

+

Help

diff --git a/deepsparse/_modules/deepsparse/utils/log.html b/deepsparse/_modules/deepsparse/utils/log.html index 3f4a4f371fe..5feb120c84c 100644 --- a/deepsparse/_modules/deepsparse/utils/log.html +++ b/deepsparse/_modules/deepsparse/utils/log.html @@ -108,10 +108,11 @@ -

Help and Support

+

Help

diff --git a/deepsparse/_modules/deepsparse/utils/onnx.html b/deepsparse/_modules/deepsparse/utils/onnx.html index 85d721efdef..4ebb253f4de 100644 --- a/deepsparse/_modules/deepsparse/utils/onnx.html +++ b/deepsparse/_modules/deepsparse/utils/onnx.html @@ -108,10 +108,11 @@ -

Help and Support

+

Help

diff --git a/deepsparse/_modules/index.html b/deepsparse/_modules/index.html index 6baefd17348..cec3996758c 100644 --- a/deepsparse/_modules/index.html +++ b/deepsparse/_modules/index.html @@ -108,10 +108,11 @@ -

Help and Support

+

Help

diff --git a/deepsparse/_sources/api/deepsparse.rst.txt b/deepsparse/_sources/api/deepsparse.rst.txt index 66aefc5f97e..5dde3ab11c6 100644 --- a/deepsparse/_sources/api/deepsparse.rst.txt +++ b/deepsparse/_sources/api/deepsparse.rst.txt @@ -1,3 +1,18 @@ +.. + Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + deepsparse package ================== diff --git a/deepsparse/_sources/api/deepsparse.utils.rst.txt b/deepsparse/_sources/api/deepsparse.utils.rst.txt index 107a47d20d9..73ddc7e5d57 100644 --- a/deepsparse/_sources/api/deepsparse.utils.rst.txt +++ b/deepsparse/_sources/api/deepsparse.utils.rst.txt @@ -1,3 +1,18 @@ +.. + Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + deepsparse.utils package ======================== diff --git a/deepsparse/_sources/api/modules.rst.txt b/deepsparse/_sources/api/modules.rst.txt index e20c5074fdf..1871f62e5ba 100644 --- a/deepsparse/_sources/api/modules.rst.txt +++ b/deepsparse/_sources/api/modules.rst.txt @@ -1,3 +1,18 @@ +.. + Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + deepsparse ========== diff --git a/deepsparse/_sources/index.rst.txt b/deepsparse/_sources/index.rst.txt index 7e3638e94c5..0a2344b29b2 100644 --- a/deepsparse/_sources/index.rst.txt +++ b/deepsparse/_sources/index.rst.txt @@ -17,16 +17,17 @@ DeepSparse |version| ==================== -CPU inference engine that delivers unprecedented performance for sparse models. +Neural network inference engine that delivers GPU-class performance for sparsified models on CPUs .. raw:: html
- GitHub - - - GitHub - + + GitHub + + + GitHub + Documentation @@ -50,54 +51,59 @@ CPU inference engine that delivers unprecedented performance for sparse models. Overview ======== -The DeepSparse Engine is a CPU runtime that delivers unprecedented performance by taking advantage of -natural sparsity within neural networks to reduce compute required as well as accelerate memory bound workloads. -It is focused on model deployment and scaling machine learning pipelines, -fitting seamlessly into your existing deployments as an inference backend. +The DeepSparse Engine is a CPU runtime that delivers GPU-class performance by taking advantage of sparsity within neural networks to reduce compute required as well as accelerate memory bound workloads. +It is focused on model deployment and scaling machine learning pipelines, fitting seamlessly into your existing deployments as an inference backend. + +`This repository `_ includes package APIs along with examples to quickly get started benchmarking and inferencing sparse models. + +Sparsification +============== + +Sparsification is the process of taking a trained deep learning model and removing redundant information from the overprecise and over-parameterized network resulting in a faster and smaller model. +Techniques for sparsification are all encompassing including everything from inducing sparsity using `pruning `_ and `quantization `_ to enabling naturally occurring sparsity using `activation sparsity `_ or `winograd/FFT `_. +When implemented correctly, these techniques result in significantly more performant and smaller models with limited to no effect on the baseline metrics. +For example, pruning plus quantization can give over `7x improvements in performance `_ while recovering to nearly the same baseline accuracy. + +The Deep Sparse product suite builds on top of sparsification enabling you to easily apply the techniques to your datasets and models using recipe-driven approaches. +Recipes encode the directions for how to sparsify a model into a simple, easily editable format. +- Download a sparsification recipe and sparsified model from the `SparseZoo `_. +- Alternatively, create a recipe for your model using `Sparsify `_. +- Apply your recipe with only a few lines of code using `SparseML `_. +- Finally, for GPU-level performance on CPUs, deploy your sparse-quantized model with the `DeepSparse Engine `_. -`This GitHub repository `_ includes package APIs along with examples to quickly get started learning about and -actually running sparse models. + +**Full Deep Sparse product flow:** + + Compatibility ============= -The DeepSparse Engine ingests models in the `ONNX `_ format, -allowing for compatibility with `PyTorch `_, -`TensorFlow `_, `Keras `_, -and `many other frameworks `_ that support it. +The DeepSparse Engine ingests models in the `ONNX `_ format, +allowing for compatibility with `PyTorch `_, +`TensorFlow `_, `Keras `_, +and `many other frameworks `_ that support it. This reduces the extra work of preparing your trained model for inference to just one step of exporting. -Related Products -================ - -- `SparseZoo `_: - Neural network model repository for highly sparse models and optimization recipes -- `SparseML `_: - Libraries for state-of-the-art deep neural network optimization algorithms, - enabling simple pipelines integration with a few lines of code -- `Sparsify `_: - Easy-to-use autoML interface to optimize deep neural networks for - better inference performance and a smaller footprint - Resources and Learning More =========================== -- `SparseZoo Documentation `_ -- `SparseML Documentation `_ -- `Sparsify Documentation `_ -- `Neural Magic Blog `_, - `Resources `_, - `Website `_ +- `SparseZoo Documentation `_ +- `SparseML Documentation `_ +- `Sparsify Documentation `_ +- `Neural Magic Blog `_, + `Resources `_, + `Website `_ Release History =============== Official builds are hosted on PyPi -- stable: `deepsparse `_ -- nightly (dev): `deepsparse-nightly `_ +- stable: `deepsparse `_ +- nightly (dev): `deepsparse-nightly `_ Additionally, more information can be found via -`GitHub Releases `_. +`GitHub Releases `_. .. toctree:: :maxdepth: 3 @@ -120,8 +126,9 @@ Additionally, more information can be found via api/deepsparse .. toctree:: - :maxdepth: 2 - :caption: Help and Support + :maxdepth: 3 + :caption: Help Bugs, Feature Requests Support, General Q&A + Neural Magic Docs \ No newline at end of file diff --git a/deepsparse/_sources/quicktour.md.txt b/deepsparse/_sources/quicktour.md.txt index 432c267f70e..7bb3c94a1ac 100644 --- a/deepsparse/_sources/quicktour.md.txt +++ b/deepsparse/_sources/quicktour.md.txt @@ -16,24 +16,22 @@ limitations under the License. ## Quick Tour -To expedite inference and benchmarking on real models, we include the `sparsezoo` package. -[SparseZoo](https://github.com/neuralmagic/sparsezoo) hosts inference optimized models, -trained on repeatable optimization recipes using state-of-the-art techniques from -[SparseML](https://github.com/neuralmagic/sparseml). +To expedite inference and benchmarking on real models, we include the `sparsezoo` package. [SparseZoo](https://github.com/neuralmagic/sparsezoo) hosts inference-optimized models, trained on repeatable sparsification recipes using state-of-the-art techniques from [SparseML](https://github.com/neuralmagic/sparseml). ### Quickstart with SparseZoo ONNX Models -**MobileNetV1 Dense** +**ResNet-50 Dense** -Here is how to quickly perform inference with DeepSparse Engine on a pre-trained dense MobileNetV1 from SparseZoo. +Here is how to quickly perform inference with DeepSparse Engine on a pre-trained dense ResNet-50 from SparseZoo. ```python from deepsparse import compile_model from sparsezoo.models import classification + batch_size = 64 # Download model and compile as optimized executable for your machine -model = classification.mobilenet_v1() +model = classification.resnet_50() engine = compile_model(model, batch_size=batch_size) # Fetch sample input and predict output using engine @@ -41,46 +39,68 @@ inputs = model.data_inputs.sample_batch(batch_size=batch_size) outputs, inference_time = engine.timed_run(inputs) ``` -**MobileNetV1 Optimized** +**ResNet-50 Sparsified** -When exploring available optimized models, you can use the `Zoo.search_optimized_models` -utility to find models that share a base. +When exploring available optimized models, you can use the `Zoo.search_optimized_models` utility to find models that share a base. -Let us try this on the dense MobileNetV1 to see what is available. +Try this on the dense ResNet-50 to see what is available: ```python from sparsezoo import Zoo from sparsezoo.models import classification -print(Zoo.search_optimized_models(classification.mobilenet_v1())) + +model = classification.resnet_50() +print(Zoo.search_optimized_models(model)) ``` + Output: -``` -[Model(stub=cv/classification/mobilenet_v1-1.0/pytorch/sparseml/imagenet/base-none), - Model(stub=cv/classification/mobilenet_v1-1.0/pytorch/sparseml/imagenet/pruned-conservative), - Model(stub=cv/classification/mobilenet_v1-1.0/pytorch/sparseml/imagenet/pruned-moderate), - Model(stub=cv/classification/mobilenet_v1-1.0/pytorch/sparseml/imagenet/pruned_quant-moderate)] + +```shell +[ + Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/base-none), + Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned-conservative), + Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned-moderate), + Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned_quant-moderate), + Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet-augmented/pruned_quant-aggressive) +] ``` -Great. We can see there are two pruned versions targeting FP32, -`conservative` at 100% and `moderate` at >= 99% of baseline accuracy. -There is also a `pruned_quant` variant targeting INT8. +We can see there are two pruned versions targeting FP32 and two pruned, quantized versions targeting INT8. +The `conservative`, `moderate`, and `aggressive` tags recover to 100%, >=99%, and <99% of baseline accuracy respectively. -Let's say you want to evaluate best performance on FP32 and are okay with a small drop in accuracy, -so we can choose `pruned-moderate` over `pruned-conservative`. +For a version of ResNet-50 that recovers close to the baseline and is very performant, choose the pruned_quant-moderate model. +This model will run [nearly 7x faster](https://neuralmagic.com/blog/benchmark-resnet50-with-deepsparse) than the baseline model on a compatible CPU (with the VNNI instruction set enabled). +For hardware compatibility, see the Hardware Support section. ```python from deepsparse import compile_model -from sparsezoo.models import classification -batch_size = 64 - -model = classification.mobilenet_v1(optim_name="pruned", optim_category="moderate") -engine = compile_model(model, batch_size=batch_size) +import numpy -inputs = model.data_inputs.sample_batch(batch_size=batch_size) -outputs, inference_time = engine.timed_run(inputs) +batch_size = 64 +sample_inputs = [numpy.random.randn(batch_size, 3, 224, 224).astype(numpy.float32)] + +# run baseline benchmarking +engine_base = compile_model( + model="zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/base-none", + batch_size=batch_size, +) +benchmarks_base = engine_base.benchmark(sample_inputs) +print(benchmarks_base) + +# run sparse benchmarking +engine_sparse = compile_model( + model="zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned_quant-moderate", + batch_size=batch_size, +) +if not engine_sparse.cpu_vnni: + print("WARNING: VNNI instructions not detected, quantization speedup not well supported") +benchmarks_sparse = engine_sparse.benchmark(sample_inputs) +print(benchmarks_sparse) + +print(f"Speedup: {benchmarks_sparse.items_per_second / benchmarks_base.items_per_second:.2f}x") ``` -### Quickstart with custom ONNX models +### Quickstart with Custom ONNX Models We accept ONNX files for custom models, too. Simply plug in your model to compare performance with other solutions. diff --git a/deepsparse/api/deepsparse.html b/deepsparse/api/deepsparse.html index ceb4a956006..7f2be874cd4 100644 --- a/deepsparse/api/deepsparse.html +++ b/deepsparse/api/deepsparse.html @@ -123,10 +123,11 @@ -

Help and Support

+

Help

@@ -592,8 +593,9 @@

Submodules
Parameters
    -
  • model – Either a path to the model’s onnx file, a sparsezoo Model object, -or a sparsezoo ONNX File object that defines the neural network

  • +
  • model – Either a path to the model’s onnx file, a SparseZoo model stub +prefixed by ‘zoo:’, a SparseZoo Model object, or a SparseZoo ONNX File +object that defines the neural network

  • batch_size – The batch size of the inputs to be used with the engine

  • num_cores – The number of physical cores to run the model on. Pass None or 0 to run on the max number of cores @@ -614,7 +616,7 @@

    Submodules
    -benchmark(inp: List[numpy.ndarray], num_iterations: int = 20, num_warmup_iterations: int = 5, include_inputs: bool = False, include_outputs: bool = False)deepsparse.benchmark.BenchmarkResults[source]
    +benchmark(inp: List[numpy.ndarray], num_iterations: int = 20, num_warmup_iterations: int = 5, include_inputs: bool = False, include_outputs: bool = False, show_progress: bool = False)deepsparse.benchmark.BenchmarkResults[source]

    A convenience function for quickly benchmarking the instantiated model on a given input in the DeepSparse Engine. After executing, will return the summary statistics for benchmarking.

    @@ -633,6 +635,7 @@

    SubmodulesReturns @@ -643,7 +646,7 @@

    Submodules
    -benchmark_loader(loader: Iterable[List[numpy.ndarray]], num_iterations: int = 20, num_warmup_iterations: int = 5, include_inputs: bool = False, include_outputs: bool = False)deepsparse.benchmark.BenchmarkResults[source]
    +benchmark_loader(loader: Iterable[List[numpy.ndarray]], num_iterations: int = 20, num_warmup_iterations: int = 5, include_inputs: bool = False, include_outputs: bool = False, show_progress: bool = False)deepsparse.benchmark.BenchmarkResults[source]

    A convenience function for quickly benchmarking the instantiated model on a give DataLoader in the DeepSparse Engine. After executing, will return the summary statistics for benchmarking.

    @@ -662,6 +665,7 @@

    SubmodulesReturns @@ -848,9 +852,9 @@

    Submodules
    Parameters
      -
    • model – Either a path to the model’s onnx file, a sparsezoo Model object, -or a sparsezoo ONNX File object that defines the neural network -graph definition to analyze

    • +
    • model – Either a path to the model’s onnx file, a SparseZoo model stub +prefixed by ‘zoo:’, a SparseZoo Model object, or a SparseZoo ONNX File +object that defines the neural network graph definition to analyze

    • inp – The list of inputs to pass to the engine for analyzing inference. The expected order is the inputs order as defined in the ONNX graph.

    • batch_size – The batch size of the inputs to be used with the model

    • @@ -881,7 +885,7 @@

      Submodules
      -deepsparse.engine.benchmark_model(model: Union[str, sparsezoo.objects.model.Model, sparsezoo.objects.file.File], inp: List[numpy.ndarray], batch_size: int = 1, num_cores: Optional[int] = None, num_iterations: int = 20, num_warmup_iterations: int = 5, include_inputs: bool = False, include_outputs: bool = False)deepsparse.benchmark.BenchmarkResults[source]
      +deepsparse.engine.benchmark_model(model: Union[str, sparsezoo.objects.model.Model, sparsezoo.objects.file.File], inp: List[numpy.ndarray], batch_size: int = 1, num_cores: Optional[int] = None, num_iterations: int = 20, num_warmup_iterations: int = 5, include_inputs: bool = False, include_outputs: bool = False, show_progress: bool = False)deepsparse.benchmark.BenchmarkResults[source]

      Convenience function to benchmark a model in the DeepSparse Engine from an ONNX file for inference. Gives defaults of batch_size == 1 and num_cores == None @@ -889,8 +893,9 @@

      Submodules
      Parameters
      -

      Help and Support

      +

      Help

      diff --git a/deepsparse/api/modules.html b/deepsparse/api/modules.html index fde843cca53..2df365f5890 100644 --- a/deepsparse/api/modules.html +++ b/deepsparse/api/modules.html @@ -108,10 +108,11 @@ -

      Help and Support

      +

      Help

      diff --git a/deepsparse/genindex.html b/deepsparse/genindex.html index 3845ba31a31..01453f7f562 100644 --- a/deepsparse/genindex.html +++ b/deepsparse/genindex.html @@ -108,10 +108,11 @@ -

      Help and Support

      +

      Help

      diff --git a/deepsparse/index.html b/deepsparse/index.html index 3b72f96f8d8..6ae56065d9e 100644 --- a/deepsparse/index.html +++ b/deepsparse/index.html @@ -109,10 +109,11 @@ -

      Help and Support

      +

      Help

      @@ -182,13 +183,14 @@

      DeepSparse 0.1

      -

      CPU inference engine that delivers unprecedented performance for sparse models.

      +

      Neural network inference engine that delivers GPU-class performance for sparsified models on CPUs

      -GitHub - - - GitHub - + + GitHub + + + GitHub + Documentation @@ -209,58 +211,57 @@

      DeepSparse 0.1

      Overview

      -

      The DeepSparse Engine is a CPU runtime that delivers unprecedented performance by taking advantage of -natural sparsity within neural networks to reduce compute required as well as accelerate memory bound workloads. -It is focused on model deployment and scaling machine learning pipelines, -fitting seamlessly into your existing deployments as an inference backend.

      -

      This GitHub repository includes package APIs along with examples to quickly get started learning about and -actually running sparse models.

      +

      The DeepSparse Engine is a CPU runtime that delivers GPU-class performance by taking advantage of sparsity within neural networks to reduce compute required as well as accelerate memory bound workloads. +It is focused on model deployment and scaling machine learning pipelines, fitting seamlessly into your existing deployments as an inference backend.

      +

      This repository includes package APIs along with examples to quickly get started benchmarking and inferencing sparse models.

      +

      +
      +

      Sparsification

      +

      Sparsification is the process of taking a trained deep learning model and removing redundant information from the overprecise and over-parameterized network resulting in a faster and smaller model. +Techniques for sparsification are all encompassing including everything from inducing sparsity using pruning and quantization to enabling naturally occurring sparsity using activation sparsity or winograd/FFT. +When implemented correctly, these techniques result in significantly more performant and smaller models with limited to no effect on the baseline metrics. +For example, pruning plus quantization can give over 7x improvements in performance while recovering to nearly the same baseline accuracy.

      +

      The Deep Sparse product suite builds on top of sparsification enabling you to easily apply the techniques to your datasets and models using recipe-driven approaches. +Recipes encode the directions for how to sparsify a model into a simple, easily editable format. +- Download a sparsification recipe and sparsified model from the SparseZoo. +- Alternatively, create a recipe for your model using Sparsify. +- Apply your recipe with only a few lines of code using SparseML. +- Finally, for GPU-level performance on CPUs, deploy your sparse-quantized model with the DeepSparse Engine.

      +

      Full Deep Sparse product flow:

      +

      <img src=”https://docs.neuralmagic.com/docs/source/sparsification/flow-overview.svg” width=”960px”>

      Compatibility

      -

      The DeepSparse Engine ingests models in the ONNX format, -allowing for compatibility with PyTorch, -TensorFlow, Keras, -and many other frameworks that support it. +

      The DeepSparse Engine ingests models in the ONNX format, +allowing for compatibility with PyTorch, +TensorFlow, Keras, +and many other frameworks that support it. This reduces the extra work of preparing your trained model for inference to just one step of exporting.

      -

      Release History

      Official builds are hosted on PyPi -- stable: deepsparse -- nightly (dev): deepsparse-nightly

      +- stable: deepsparse +- nightly (dev): deepsparse-nightly

      Additionally, more information can be found via -GitHub Releases.

      +GitHub Releases.

      diff --git a/deepsparse/py-modindex.html b/deepsparse/py-modindex.html index f1b91395df2..2d5371b8424 100644 --- a/deepsparse/py-modindex.html +++ b/deepsparse/py-modindex.html @@ -111,10 +111,11 @@ -

      Help and Support

      +

      Help

      diff --git a/deepsparse/quicktour.html b/deepsparse/quicktour.html index 21c2452ec90..df0091d8ae5 100644 --- a/deepsparse/quicktour.html +++ b/deepsparse/quicktour.html @@ -100,7 +100,7 @@
      • Quick Tour
      • Installation
      • @@ -114,10 +114,11 @@ -

        Help and Support

        +

        Help

        @@ -201,20 +202,18 @@ limitations under the License. -->

        Quick Tour

        -

        To expedite inference and benchmarking on real models, we include the sparsezoo package. -SparseZoo hosts inference optimized models, -trained on repeatable optimization recipes using state-of-the-art techniques from -SparseML.

        +

        To expedite inference and benchmarking on real models, we include the sparsezoo package. SparseZoo hosts inference-optimized models, trained on repeatable sparsification recipes using state-of-the-art techniques from SparseML.

        Quickstart with SparseZoo ONNX Models

        -

        MobileNetV1 Dense

        -

        Here is how to quickly perform inference with DeepSparse Engine on a pre-trained dense MobileNetV1 from SparseZoo.

        +

        ResNet-50 Dense

        +

        Here is how to quickly perform inference with DeepSparse Engine on a pre-trained dense ResNet-50 from SparseZoo.

        from deepsparse import compile_model
         from sparsezoo.models import classification
        +
         batch_size = 64
         
         # Download model and compile as optimized executable for your machine
        -model = classification.mobilenet_v1()
        +model = classification.resnet_50()
         engine = compile_model(model, batch_size=batch_size)
         
         # Fetch sample input and predict output using engine
        @@ -222,41 +221,61 @@ 

        Quickstart with SparseZoo ONNX Modelsoutputs, inference_time = engine.timed_run(inputs)

        -

        MobileNetV1 Optimized

        -

        When exploring available optimized models, you can use the Zoo.search_optimized_models -utility to find models that share a base.

        -

        Let us try this on the dense MobileNetV1 to see what is available.

        +

        ResNet-50 Sparsified

        +

        When exploring available optimized models, you can use the Zoo.search_optimized_models utility to find models that share a base.

        +

        Try this on the dense ResNet-50 to see what is available:

        from sparsezoo import Zoo
         from sparsezoo.models import classification
        -print(Zoo.search_optimized_models(classification.mobilenet_v1()))
        +
        +model = classification.resnet_50()
        +print(Zoo.search_optimized_models(model))
         

        Output:

        -
        [Model(stub=cv/classification/mobilenet_v1-1.0/pytorch/sparseml/imagenet/base-none),
        - Model(stub=cv/classification/mobilenet_v1-1.0/pytorch/sparseml/imagenet/pruned-conservative),
        - Model(stub=cv/classification/mobilenet_v1-1.0/pytorch/sparseml/imagenet/pruned-moderate),
        - Model(stub=cv/classification/mobilenet_v1-1.0/pytorch/sparseml/imagenet/pruned_quant-moderate)]
        +
        [
        +    Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/base-none), 
        +    Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned-conservative), 
        +    Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned-moderate), 
        +    Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned_quant-moderate), 
        +    Model(stub=cv/classification/resnet_v1-50/pytorch/sparseml/imagenet-augmented/pruned_quant-aggressive)
        +]
         
        -

        Great. We can see there are two pruned versions targeting FP32, -conservative at 100% and moderate at >= 99% of baseline accuracy. -There is also a pruned_quant variant targeting INT8.

        -

        Let’s say you want to evaluate best performance on FP32 and are okay with a small drop in accuracy, -so we can choose pruned-moderate over pruned-conservative.

        +

        We can see there are two pruned versions targeting FP32 and two pruned, quantized versions targeting INT8. +The conservative, moderate, and aggressive tags recover to 100%, >=99%, and <99% of baseline accuracy respectively.

        +

        For a version of ResNet-50 that recovers close to the baseline and is very performant, choose the pruned_quant-moderate model. +This model will run nearly 7x faster than the baseline model on a compatible CPU (with the VNNI instruction set enabled). +For hardware compatibility, see the Hardware Support section.

        from deepsparse import compile_model
        -from sparsezoo.models import classification
        -batch_size = 64
        +import numpy
         
        -model = classification.mobilenet_v1(optim_name="pruned", optim_category="moderate")
        -engine = compile_model(model, batch_size=batch_size)
        -
        -inputs = model.data_inputs.sample_batch(batch_size=batch_size)
        -outputs, inference_time = engine.timed_run(inputs)
        +batch_size = 64
        +sample_inputs = [numpy.random.randn(batch_size, 3, 224, 224).astype(numpy.float32)]
        +
        +# run baseline benchmarking
        +engine_base = compile_model(
        +    model="zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/base-none", 
        +    batch_size=batch_size,
        +)
        +benchmarks_base = engine_base.benchmark(sample_inputs)
        +print(benchmarks_base)
        +
        +# run sparse benchmarking
        +engine_sparse = compile_model(
        +    model="zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned_quant-moderate", 
        +    batch_size=batch_size,
        +)
        +if not engine_sparse.cpu_vnni:
        +    print("WARNING: VNNI instructions not detected, quantization speedup not well supported")
        +benchmarks_sparse = engine_sparse.benchmark(sample_inputs)
        +print(benchmarks_sparse)
        +
        +print(f"Speedup: {benchmarks_sparse.items_per_second / benchmarks_base.items_per_second:.2f}x")
         
        -

        Quickstart with custom ONNX models

        +

        Quickstart with Custom ONNX Models

        We accept ONNX files for custom models, too. Simply plug in your model to compare performance with other solutions.

        > wget https://github.com/onnx/models/raw/master/vision/classification/mobilenet/model/mobilenetv2-7.onnx
         Saving to: ‘mobilenetv2-7.onnx’
        diff --git a/deepsparse/search.html b/deepsparse/search.html
        index 616e245c014..fc89856429a 100644
        --- a/deepsparse/search.html
        +++ b/deepsparse/search.html
        @@ -111,10 +111,11 @@
         
        -

        Help and Support

        +

        Help

        diff --git a/deepsparse/searchindex.js b/deepsparse/searchindex.js index 3936c2ff966..86fe5c57b7f 100644 --- a/deepsparse/searchindex.js +++ b/deepsparse/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["api/deepsparse","api/deepsparse.utils","api/modules","debugging-optimizing/diagnostics-debugging","debugging-optimizing/example-log","debugging-optimizing/index","debugging-optimizing/numactl-utility","hardware","index","installation","quicktour"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":3,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":2,"sphinx.domains.rst":2,"sphinx.domains.std":1,"sphinx.ext.intersphinx":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["api/deepsparse.rst","api/deepsparse.utils.rst","api/modules.rst","debugging-optimizing/diagnostics-debugging.md","debugging-optimizing/example-log.md","debugging-optimizing/index.rst","debugging-optimizing/numactl-utility.md","hardware.md","index.rst","installation.md","quicktour.md"],objects:{"":{deepsparse:[0,0,0,"-"]},"deepsparse.benchmark":{BatchBenchmarkResult:[0,1,1,""],BenchmarkResults:[0,1,1,""]},"deepsparse.benchmark.BatchBenchmarkResult":{batch_size:[0,2,1,""],batches_per_second:[0,2,1,""],extras:[0,2,1,""],inputs:[0,2,1,""],items_per_second:[0,2,1,""],ms_per_batch:[0,2,1,""],ms_per_item:[0,2,1,""],outputs:[0,2,1,""],time_elapsed:[0,2,1,""],time_end:[0,2,1,""],time_start:[0,2,1,""]},"deepsparse.benchmark.BenchmarkResults":{append_batch:[0,2,1,""],batch_sizes:[0,2,1,""],batch_times:[0,2,1,""],batch_times_mean:[0,2,1,""],batch_times_median:[0,2,1,""],batch_times_std:[0,2,1,""],batches_per_second:[0,2,1,""],inputs:[0,2,1,""],items_per_second:[0,2,1,""],ms_per_batch:[0,2,1,""],ms_per_item:[0,2,1,""],num_batches:[0,2,1,""],num_items:[0,2,1,""],outputs:[0,2,1,""],results:[0,2,1,""]},"deepsparse.cpu":{cpu_details:[0,3,1,""]},"deepsparse.engine":{Engine:[0,1,1,""],analyze_model:[0,3,1,""],benchmark_model:[0,3,1,""],compile_model:[0,3,1,""]},"deepsparse.engine.Engine":{batch_size:[0,2,1,""],benchmark:[0,2,1,""],benchmark_loader:[0,2,1,""],cpu_avx_type:[0,2,1,""],cpu_vnni:[0,2,1,""],mapped_run:[0,2,1,""],model_path:[0,2,1,""],num_cores:[0,2,1,""],num_sockets:[0,2,1,""],run:[0,2,1,""],timed_run:[0,2,1,""]},"deepsparse.lib":{init_deepsparse_lib:[0,3,1,""]},"deepsparse.utils":{data:[1,0,0,"-"],log:[1,0,0,"-"],onnx:[1,0,0,"-"]},"deepsparse.utils.data":{verify_outputs:[1,3,1,""]},"deepsparse.utils.log":{log_init:[1,3,1,""]},"deepsparse.utils.onnx":{generate_random_inputs:[1,3,1,""],get_external_inputs:[1,3,1,""],get_external_outputs:[1,3,1,""],get_input_names:[1,3,1,""],get_output_names:[1,3,1,""],override_onnx_batch_size:[1,3,1,""]},deepsparse:{benchmark:[0,0,0,"-"],cpu:[0,0,0,"-"],engine:[0,0,0,"-"],lib:[0,0,0,"-"],utils:[1,0,0,"-"],version:[0,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:function"},terms:{"0008":1,"100":10,"104":4,"112":4,"121":4,"122":[3,4],"124":4,"126":4,"129":4,"130":4,"132":4,"140":4,"157":4,"224":[0,3,4],"242":4,"25690112":4,"265":[3,4],"276":4,"321":[3,4],"33918976":4,"478":[3,4],"512":7,"595":4,"604":4,"644":4,"652":4,"667":4,"672":4,"679":4,"684":4,"706":[3,4],"715":[3,4],"723":[3,4],"757396":4,"7f4fbbd3f740":[3,4],"96ce2f6cb23b8ab377012ed9ef38d3da3b9f5313":4,"case":0,"class":0,"const":[3,4],"default":[0,3,4],"export":[3,4,6,8],"float":[0,1,3,4],"function":0,"import":10,"int":[0,1],"new":[0,3,4],"return":[0,1],"super":[3,4],"true":0,"try":10,"while":0,For:[3,6],Ice:7,Its:3,One:[0,6],The:[0,1,3,4,6,7,8],Then:[3,9],There:[3,10],These:0,Useful:0,Using:[5,8],Will:0,about:[6,8],abov:6,absolut:1,acceler:8,accept:10,accuraci:10,achiev:0,across:0,activ:[0,7],actual:8,add:[0,3],added:0,addit:[5,8,9],addition:8,advantag:8,advis:6,affect:0,after:[0,3,6],against:3,agreement:3,algorithm:[7,8],all:[0,1,3,6],allclos:1,alloc:[3,6],allocate_buffers_pass:4,allow:[3,6,8],along:8,also:[6,10],altern:3,amd:[6,7],amount:[0,5,8],analysi:0,analyz:[0,3],analyze_model:0,ani:[0,6,9],anoth:6,api:8,append_batch:0,applic:3,architectur:[3,6],arrai:[0,1],art:[8,10],ascontiguousarrai:0,assert:0,assign:3,associ:3,assum:0,astyp:0,atol:1,automl:8,avail:[0,3,6,7,10],averag:0,avoid:0,avx2:[0,7],avx512:[0,4],avx:[0,7],backend:8,base:[0,10],baselin:10,basi:3,batch:[0,1,4,5],batch_siz:[0,1,10],batch_tim:0,batch_times_mean:0,batch_times_median:0,batch_times_std:0,batchbenchmarkresult:0,batches_per_second:0,been:[0,6],befor:[0,3],begin:[3,4],behavior:3,being:6,below:3,benchmark:[2,8,10],benchmark_load:0,benchmark_model:0,benchmarkresult:0,benefici:0,benefit:[0,3],best:[0,6,10],better:8,between:[3,6],bia:[3,4],binari:4,bind:[3,6],block:3,blog:8,bool:0,boost:7,both:0,bound:[6,8],bring:0,broadwel:7,bsd300:4,buffer:4,bug:8,build:[3,8],built:0,calc:[3,4],can:[0,3,6,8,10],cannon:7,cannot:4,captur:3,care:6,cascad:7,certain:[0,3],channel:0,check:1,choic:0,choos:10,classif:10,clone:9,code:[0,3,4,8],com:[3,10],command:3,commod:0,common:[0,3],commun:3,compar:[0,1,10],compil:[0,4,5,10],compile_model:[0,10],complex:3,complic:3,comput:[6,8],compute_func:[3,4],conflict:3,connect:3,consecut:6,consent:3,conserv:10,constant:[3,4],construct:4,construct_subgraph:4,consult:3,contain:[0,3],content:[2,3,8],contigu:0,control:[5,8],conv1:[3,4],conv2:[3,4],conv3:[3,4],conv4:[3,4],conv:[3,4],conveni:0,convolut:3,cooper:7,core:[0,4,5,6],correctli:0,correspond:0,could:[0,6],count:0,cours:6,cpu:[2,6,7,8],cpu_avx_typ:0,cpu_detail:0,cpu_vnni:0,cpunodebind:6,creat:[0,3,4],current:[0,6],custom:8,data:[0,2,3],data_input:10,dataload:0,dataset:3,debian:9,debug:8,decid:3,decis:3,deep:8,deepspars:[5,7,9,10],deepsparseengin:[3,6],defens:3,defin:0,definit:0,degrad:[3,6],deliv:8,dens:10,depend:9,deploy:[3,8],describ:[3,6],design:3,desir:[0,6],detail:[0,3,6,7],detect:0,determin:5,dev:8,deviat:0,diagnos:[3,5,8],diagnost:[5,8],dict:0,dictionari:0,differ:[1,3,4,6,7],dimens:[0,1],disabl:[3,6],displai:[3,6],document:8,doe:6,doing:6,download:10,drop:10,due:3,dure:[0,6],dynam:3,each:[0,1,5,6],easi:8,ecosystem:3,either:0,elaps:0,elementwis:[3,4],empti:4,emul:7,enabl:[5,6,7,8],end:[0,3,4,6],engin:[2,4,5,7,8,10],ensur:0,entir:[0,3],environ:[0,3,6,9],error:3,evalu:10,exampl:[0,5,6,8,9],except:0,execut:[0,4,5,6,10],exist:8,expect:0,expedit:10,explain:[3,4],explor:[9,10],extens:7,extern:1,extra:[0,8],fals:0,fatal:3,featur:8,fetch:10,few:8,file:[0,1,3,10],filter:4,find:[5,10],fine:6,finish:4,first:[0,3,6],fit:8,fix:0,float32:0,focus:8,follow:[3,4,6],footprint:8,forc:[0,6],format:[0,8],forward:0,found:8,four:6,fp32:10,framework:8,from:[0,3,4,6,10],func:0,further:6,gather:1,gener:[0,1,3,4,8,10],generate_random_input:[1,10],get:[3,8],get_external_input:1,get_external_output:1,get_input_nam:1,get_output_nam:1,getcap:4,github:[8,10],give:0,given:[0,6],gpu:0,grain:6,graph:[0,4,5],graph_util:[3,4],graphview:[3,4],great:10,greater:[3,4],gt_output:1,guid:3,guidanc:[5,8],hardwar:[6,8],has:6,haswel:7,have:0,height:0,help:8,here:[6,7,10],highli:[7,8],hit:[0,3],home:[3,4],host:[8,10],how:[0,3,10],howev:6,hpp:4,http:[3,10],hurt:3,hyper:6,ideal:3,idl:6,ignor:4,imagenet:10,impos:0,imposed_a:0,imposed_k:0,includ:[3,4,8,10],include_input:0,include_output:0,increas:3,incur:0,indic:3,infer:[0,3,8,10],inference_tim:10,info:4,inform:[0,3,8],ingest:8,init_deepsparse_lib:0,initi:3,inp:0,input:[0,1,3,4,10],input_data:4,instal:8,instanc:[0,3],instanti:0,instruct:[0,7],int16:0,int8:[0,10],intak:[3,4],integr:8,intel:[6,7],interfac:[0,8],inventori:6,isinst:0,issu:3,item:0,items_per_second:0,iter:0,its:1,jdoe:[3,4],jit:[3,4],just:8,keep:[6,9],kera:8,kernel:[0,3,4,7],lake:7,layer:0,learn:3,legal:3,let:10,level:[0,3,5,8],lib:[2,8],librari:8,like:[3,9],line:[3,8],linux:[0,7,9],list:[0,1,3],load:3,loader:0,local:0,locat:[3,6],log:[0,2,5,8],log_init:1,logic:6,look:3,machin:[0,3,8,10],macro:3,made:3,magic:[0,3,8],mai:[3,6],main:3,major:[3,6],make:3,man:6,manag:3,mani:[3,8],manual:3,map:6,mapped_run:0,master:10,match:[0,1,4],max:[0,1],maxim:3,maximum:6,mean:0,median:0,membind:6,memori:[0,6,8],method:0,microarchitectur:7,migrat:[3,6],millisecond:0,minim:0,mobilenet:10,mobilenet_v1:10,mobilenetv1:10,mobilenetv2:10,mode:6,model:[0,1,3,4,8],model_path:0,moder:10,modifi:1,modul:[2,8],monitor:3,monopol:3,more:[0,6],most:0,ms_per_batch:0,ms_per_item:0,multi:[0,6],multipl:6,must:0,name:[0,1],name_of_log:3,natur:8,ndarrai:0,need:6,network:[0,3,4,8],neural:[0,3,8],neuralmag:[3,4],neuralmagic_cr:4,neuralmagic_execut:4,newer:7,nightli:8,nm_arch:0,nm_bind_threads_to_cor:[3,6],nm_exec_test_it:4,nm_execution_provid:[3,4],nm_logging_level:[3,4],nm_ogging_level:3,nm_ort:[3,4],nm_subgraph:3,nm_subgraph_1:[3,4],nmexecutionprovid:4,node:[4,5],non:4,none:[0,1,4,10],normal:3,notabl:3,note:[0,3,5,8],notebook:9,num_batch:0,num_cor:0,num_item:0,num_iter:0,num_socket:0,num_warmup_iter:0,numactl:[5,8],number:[0,5,6],numpi:[0,1],nyann:[3,4],object:[0,1],obtain:5,offici:8,okai:10,onc:3,one:[0,3,6,8],onli:[0,3,4,6],onnx:[0,2,4,8],onnx_filenam:4,onnx_filepath:[1,10],onnxruntime_neuralmag:[3,4],openmp:3,oper:[3,4,7],ops:[3,4],optim:[0,4,6,7,8,10],optim_categori:10,optim_nam:10,optimization_level:0,option:[0,1],order:[0,3,9],ort:[3,4],other:[0,3,6,8,10],otherwis:0,our:5,out:[0,3,6],output:[0,1,3,4,10],output_data:4,outsid:6,over:[0,7,10],overrid:[0,1],override_onnx_batch_s:1,own:6,packag:[2,8,10],pad:[3,4],page:6,pair:1,param:1,paramet:[0,3,6],pars:[4,5,8],part:3,particular:6,pass:0,path:[0,1],pattern:4,per:[0,3,6],percentag:[0,3],perform:[0,5,6,8,10],physcpubind:6,physic:[0,6],pin:[3,5,8],pinpoint:3,pip:9,pipelin:[3,8],planner:4,pleas:0,plug:10,point:3,polici:3,pool:3,portion:[3,4],possibl:3,practic:3,pre:10,predict:10,prefer:6,prepar:8,prevent:[3,6],print:[3,4,10],privaci:3,process:[3,6],produc:[5,8],properti:0,provid:[1,3,4],prunabl:0,prune:[0,10],pruned_qu:10,pypi:8,python:[0,3,9],pytorch:[8,10],quantiz:[0,7],quick:8,quickli:[0,8,10],quickstart:8,rais:0,rand:0,random:[0,1,4,10],rang:6,ratio:4,raw:10,real:[3,10],recip:[8,10],recommend:[0,3,7,9],record:0,reduc:8,refer:1,regular:0,rel:1,relat:0,relu:[0,3,4],repeat:[0,10],repo:4,report:3,repositori:[8,9],request:8,requir:[4,6,8,9],reshap:[3,4],resolut:[3,4],resourc:[3,5],restrict:6,result:[0,3,6],reveal:3,review:3,rewrit:1,rtol:1,run:[0,3,4,6,7,8,10],run_model:3,runtim:[4,5,8],sai:10,same:6,sampl:[3,10],sample_batch:10,save:[1,10],scale:8,scarc:3,scienc:3,script:3,seamlessli:8,search_optimized_model:10,second:[0,6],see:[0,3,6,10],seek:3,select:6,self:3,separ:6,serv:3,server:6,set:[0,3,6],setup:0,sever:3,shape:[1,3,4],share:[3,6,10],shell:3,ship:3,should:6,show:[3,6],shuffl:0,signific:0,similar:1,similarli:6,simpl:8,simpli:10,simplif:3,sinc:6,singl:[0,6],size:[0,1,4,5],skylak:7,small:10,smaller:8,smt:6,socket:[0,6],softwar:3,solut:10,some:[0,3,7],sourc:[0,1],spars:[7,8],sparseml:[8,10],sparsezoo:[0,8],sparsifi:8,sparsiti:[0,7,8],specif:[0,6,7],specifi:[3,6],speedup:0,split:3,src:[3,4],stabl:8,standard:[0,6],start:[0,3,4,6,8],startup:6,state:[0,3,8,10],statement:3,statist:[0,5],stderr:3,steadi:0,step:8,store:0,str:[0,1],straightforward:6,stride:[3,4],string:1,structur:0,stub:10,subgraph:[4,5],submodul:[2,8],subpackag:[2,8],suffici:1,summari:0,super_resolut:4,support:[0,4,5,8],supported_subgraph:[3,4],system:[0,3,4,6,7,9],tabl:7,take:[0,8],target:10,tbb:3,technic:3,techniqu:10,tensor:[0,1,3],tensorflow:8,test:[4,9],test_1:4,than:[3,4,6],thei:[1,3],them:3,thi:[0,3,6,8,9,10],thread:[3,5,8],through:[0,6],tiger:7,time:[0,5],time_elaps:0,time_end:0,time_start:0,timed_run:[0,10],toler:1,too:10,took:0,torch:[3,4],total:4,tour:8,tradit:3,train:[3,8,10],translat:[3,4],transpos:[3,4],troubleshoot:3,truncat:3,tune:[5,8],tupl:0,two:[1,6,10],txt:3,type:[0,1,3],union:0,uniqu:3,unit:4,unlik:3,unoptim:3,unpreced:8,unsupport:[3,4],use:[0,3,6,8,10],used:[0,6],using:[0,3,4,6,9,10],usual:[1,6],util:[0,2,3,4,5,8,10],val_inp:0,valid:[0,7],validate_minimum_supported_fract:[3,4],valu:[3,4],variabl:[0,3,6],variant:10,variou:3,vector:0,verbos:[3,5,8],veri:3,verify_output:1,version:[2,4,8,10],via:8,view:5,virtual:9,vision:10,vnni:[0,7],wai:6,wand:4,want:[3,10],warm:0,warn:[3,4],websit:8,weight:[0,3,4],well:8,were:0,wget:10,what:[0,3,10],when:[0,6,10],where:[3,4,6],whether:3,which:[3,6],whole:5,width:0,within:[3,8],work:[6,7,8],workload:8,would:[3,6,9],x86:7,yet:0,you:[0,3,6,9,10],your:[3,8,9,10],zen:7,zoo:10},titles:["deepsparse package","deepsparse.utils package","deepsparse","Logging Guidance for Diagnostics and Debugging","Example Log, Verbose Level = diagnose","Debugging and Optimizing","Using the numactl Utility to Control Resource Utilization with the DeepSparse Engine","Hardware Support","DeepSparse 0.1","Installation","Quick Tour"],titleterms:{Using:6,addit:6,amount:3,batch:3,benchmark:0,compat:8,compil:3,content:[0,1],control:[3,6],core:3,cpu:0,custom:10,data:1,debug:[3,5],deepspars:[0,1,2,3,6,8],determin:3,diagnos:4,diagnost:3,each:3,enabl:3,engin:[0,3,6],exampl:[3,4],execut:3,find:3,graph:3,guidanc:3,hardwar:7,histori:8,instal:9,learn:8,level:4,lib:0,log:[1,3,4],model:10,modul:[0,1],more:8,node:3,note:6,numactl:6,number:3,obtain:3,onnx:[1,10],optim:[3,5],our:3,overview:8,packag:[0,1],pars:3,perform:3,pin:6,produc:3,product:8,quick:10,quickstart:10,relat:8,releas:8,resourc:[6,8],runtim:3,size:3,sparsezoo:10,statist:3,subgraph:3,submodul:[0,1],subpackag:0,support:[3,7],thread:6,time:3,tour:10,tune:3,util:[1,6],verbos:4,version:0,view:3,whole:3}}) \ No newline at end of file +Search.setIndex({docnames:["api/deepsparse","api/deepsparse.utils","api/modules","debugging-optimizing/diagnostics-debugging","debugging-optimizing/example-log","debugging-optimizing/index","debugging-optimizing/numactl-utility","hardware","index","installation","quicktour"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":3,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":2,"sphinx.domains.rst":2,"sphinx.domains.std":1,"sphinx.ext.intersphinx":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["api/deepsparse.rst","api/deepsparse.utils.rst","api/modules.rst","debugging-optimizing/diagnostics-debugging.md","debugging-optimizing/example-log.md","debugging-optimizing/index.rst","debugging-optimizing/numactl-utility.md","hardware.md","index.rst","installation.md","quicktour.md"],objects:{"":{deepsparse:[0,0,0,"-"]},"deepsparse.benchmark":{BatchBenchmarkResult:[0,1,1,""],BenchmarkResults:[0,1,1,""]},"deepsparse.benchmark.BatchBenchmarkResult":{batch_size:[0,2,1,""],batches_per_second:[0,2,1,""],extras:[0,2,1,""],inputs:[0,2,1,""],items_per_second:[0,2,1,""],ms_per_batch:[0,2,1,""],ms_per_item:[0,2,1,""],outputs:[0,2,1,""],time_elapsed:[0,2,1,""],time_end:[0,2,1,""],time_start:[0,2,1,""]},"deepsparse.benchmark.BenchmarkResults":{append_batch:[0,2,1,""],batch_sizes:[0,2,1,""],batch_times:[0,2,1,""],batch_times_mean:[0,2,1,""],batch_times_median:[0,2,1,""],batch_times_std:[0,2,1,""],batches_per_second:[0,2,1,""],inputs:[0,2,1,""],items_per_second:[0,2,1,""],ms_per_batch:[0,2,1,""],ms_per_item:[0,2,1,""],num_batches:[0,2,1,""],num_items:[0,2,1,""],outputs:[0,2,1,""],results:[0,2,1,""]},"deepsparse.cpu":{cpu_details:[0,3,1,""]},"deepsparse.engine":{Engine:[0,1,1,""],analyze_model:[0,3,1,""],benchmark_model:[0,3,1,""],compile_model:[0,3,1,""]},"deepsparse.engine.Engine":{batch_size:[0,2,1,""],benchmark:[0,2,1,""],benchmark_loader:[0,2,1,""],cpu_avx_type:[0,2,1,""],cpu_vnni:[0,2,1,""],mapped_run:[0,2,1,""],model_path:[0,2,1,""],num_cores:[0,2,1,""],num_sockets:[0,2,1,""],run:[0,2,1,""],timed_run:[0,2,1,""]},"deepsparse.lib":{init_deepsparse_lib:[0,3,1,""]},"deepsparse.utils":{data:[1,0,0,"-"],log:[1,0,0,"-"],onnx:[1,0,0,"-"]},"deepsparse.utils.data":{verify_outputs:[1,3,1,""]},"deepsparse.utils.log":{log_init:[1,3,1,""]},"deepsparse.utils.onnx":{generate_random_inputs:[1,3,1,""],get_external_inputs:[1,3,1,""],get_external_outputs:[1,3,1,""],get_input_names:[1,3,1,""],get_output_names:[1,3,1,""],override_onnx_batch_size:[1,3,1,""]},deepsparse:{benchmark:[0,0,0,"-"],cpu:[0,0,0,"-"],engine:[0,0,0,"-"],lib:[0,0,0,"-"],utils:[1,0,0,"-"],version:[0,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:function"},terms:{"0008":1,"100":10,"104":4,"112":4,"121":4,"122":[3,4],"124":4,"126":4,"129":4,"130":4,"132":4,"140":4,"157":4,"224":[0,3,4,10],"242":4,"25690112":4,"265":[3,4],"276":4,"321":[3,4],"33918976":4,"478":[3,4],"512":7,"595":4,"604":4,"644":4,"652":4,"667":4,"672":4,"679":4,"684":4,"706":[3,4],"715":[3,4],"723":[3,4],"757396":4,"7f4fbbd3f740":[3,4],"960px":8,"96ce2f6cb23b8ab377012ed9ef38d3da3b9f5313":4,"case":0,"class":[0,8],"const":[3,4],"default":[0,3,4],"export":[3,4,6,8],"final":8,"float":[0,1,3,4],"function":0,"import":10,"int":[0,1],"new":[0,3,4],"return":[0,1],"super":[3,4],"true":0,"try":10,"while":[0,8],For:[3,6,8,10],Ice:7,Its:3,One:[0,6],The:[0,1,3,4,6,7,8,10],Then:[3,9],There:3,These:0,Useful:0,Using:[5,8],Will:0,about:6,abov:6,absolut:1,acceler:8,accept:10,accuraci:[8,10],achiev:0,across:0,activ:[0,7,8],actual:[],add:[0,3],added:0,addit:[5,8,9],addition:8,advantag:8,advis:6,affect:0,after:[0,3,6],against:3,aggress:10,agreement:3,algorithm:7,all:[0,1,3,6,8],allclos:1,alloc:[3,6],allocate_buffers_pass:4,allow:[3,6,8],along:8,also:6,altern:[3,8],amd:[6,7],amount:[0,5,8],analysi:0,analyz:[0,3],analyze_model:0,ani:[0,6,9],anoth:6,api:8,append_batch:0,appli:8,applic:3,approach:8,architectur:[3,6],arrai:[0,1],art:10,ascontiguousarrai:0,assert:0,assign:3,associ:3,assum:0,astyp:[0,10],atol:1,augment:10,automl:[],avail:[0,3,6,7,10],averag:0,avoid:0,avx2:[0,7],avx512:[0,4],avx:[0,7],backend:8,bar:0,base:[0,10],baselin:[8,10],basi:3,batch:[0,1,4,5],batch_siz:[0,1,10],batch_tim:0,batch_times_mean:0,batch_times_median:0,batch_times_std:0,batchbenchmarkresult:0,batches_per_second:0,been:[0,6],befor:[0,3],begin:[3,4],behavior:3,being:6,below:3,benchmark:[2,8,10],benchmark_load:0,benchmark_model:0,benchmarkresult:0,benchmarks_bas:10,benchmarks_spars:10,benefici:0,benefit:[0,3],best:[0,6],better:[],between:[3,6],bia:[3,4],binari:4,bind:[3,6],block:3,blog:8,bool:0,boost:7,both:0,bound:[6,8],bring:0,broadwel:7,bsd300:4,buffer:4,bug:8,build:[3,8],built:0,calc:[3,4],can:[0,3,6,8,10],cannon:7,cannot:4,captur:3,care:6,cascad:7,certain:[0,3],channel:0,check:1,choic:0,choos:10,classif:10,clone:9,close:10,code:[0,3,4,8],com:[3,8,10],command:3,commod:0,common:[0,3],commun:3,compar:[0,1,10],compat:10,compil:[0,4,5,10],compile_model:[0,10],complex:3,complic:3,comput:[6,8],compute_func:[3,4],conflict:3,connect:3,consecut:6,consent:3,conserv:10,constant:[3,4],construct:4,construct_subgraph:4,consult:3,contain:[0,3],content:[2,3,8],contigu:0,control:[5,8],conv1:[3,4],conv2:[3,4],conv3:[3,4],conv4:[3,4],conv:[3,4],conveni:0,convolut:3,cooper:7,core:[0,4,5,6],correctli:[0,8],correspond:0,could:[0,6],count:0,cours:6,cpu:[2,6,7,8,10],cpu_avx_typ:0,cpu_detail:0,cpu_vnni:[0,10],cpunodebind:6,creat:[0,3,4,8],current:[0,6],custom:8,data:[0,2,3],data_input:10,dataload:0,dataset:[3,8],debian:9,debug:8,decid:3,decis:3,deep:8,deepspars:[5,7,9,10],deepsparseengin:[3,6],defens:3,defin:0,definit:0,degrad:[3,6],deliv:8,dens:10,depend:9,deploi:8,deploy:[3,8],describ:[3,6],design:3,desir:[0,6],detail:[0,3,6,7],detect:[0,10],determin:5,dev:8,deviat:0,diagnos:[3,5,8],diagnost:[5,8],dict:0,dictionari:0,differ:[1,3,4,6,7],dimens:[0,1],direct:8,disabl:[3,6],displai:[0,3,6],doc:8,document:8,doe:6,doing:6,download:[8,10],driven:8,drop:[],due:3,dure:[0,6],dynam:3,each:[0,1,5,6],easi:[],easili:8,ecosystem:3,edit:8,effect:8,either:0,elaps:0,elementwis:[3,4],empti:4,emul:7,enabl:[5,6,7,8,10],encod:8,encompass:8,end:[0,3,4,6],engin:[2,4,5,7,8,10],engine_bas:10,engine_spars:10,ensur:0,entir:[0,3],environ:[0,3,6,9],error:3,evalu:[],everyth:8,exampl:[0,5,6,8,9],except:0,execut:[0,4,5,6,10],exist:8,expect:0,expedit:10,explain:[3,4],explor:[9,10],extens:7,extern:1,extra:[0,8],fals:0,faster:[8,10],fatal:3,featur:8,fetch:10,few:8,fft:8,file:[0,1,3,10],filter:4,find:[5,10],fine:6,finish:4,first:[0,3,6],fit:8,fix:0,float32:[0,10],flow:8,focus:8,follow:[3,4,6],footprint:[],forc:[0,6],format:[0,8],forward:0,found:8,four:6,fp32:10,framework:8,from:[0,3,4,6,8,10],full:8,func:0,further:6,gather:1,gener:[0,1,3,4,8,10],generate_random_input:[1,10],get:[3,8],get_external_input:1,get_external_output:1,get_input_nam:1,get_output_nam:1,getcap:4,github:[8,10],give:[0,8],given:[0,6],gpu:[0,8],grain:6,graph:[0,4,5],graph_util:[3,4],graphview:[3,4],great:[],greater:[3,4],gt_output:1,guid:3,guidanc:[5,8],hardwar:[6,8,10],has:6,haswel:7,have:0,height:0,help:8,here:[6,7,10],highli:7,hit:[0,3],home:[3,4],host:[8,10],how:[0,3,8,10],howev:6,hpp:4,http:[3,8,10],hurt:3,hyper:6,ideal:3,idl:6,ignor:4,imagenet:10,img:8,implement:8,impos:0,imposed_a:0,imposed_k:0,improv:8,includ:[3,4,8,10],include_input:0,include_output:0,increas:3,incur:0,indic:3,induc:8,infer:[0,3,8,10],inferenc:8,inference_tim:10,info:4,inform:[0,3,8],ingest:8,init_deepsparse_lib:0,initi:3,inp:0,input:[0,1,3,4,10],input_data:4,instal:8,instanc:[0,3],instanti:0,instruct:[0,7,10],int16:0,int8:[0,10],intak:[3,4],integr:[],intel:[6,7],interfac:0,inventori:6,isinst:0,issu:3,item:0,items_per_second:[0,10],iter:0,its:1,jdoe:[3,4],jit:[3,4],just:8,keep:[6,9],kera:8,kernel:[0,3,4,7],lake:7,layer:0,learn:3,legal:3,let:[],level:[0,3,5,8],lib:[2,8],librari:[],like:[3,9],limit:8,line:[3,8],linux:[0,7,9],list:[0,1,3],load:3,loader:0,local:0,locat:[3,6],log:[0,2,5,8],log_init:1,logic:6,look:3,machin:[0,3,8,10],macro:3,made:3,magic:[0,3,8],mai:[3,6],main:3,major:[3,6],make:3,man:6,manag:3,mani:[3,8],manual:3,map:6,mapped_run:0,master:10,match:[0,1,4],max:[0,1],maxim:3,maximum:6,mean:0,median:0,membind:6,memori:[0,6,8],method:0,metric:8,microarchitectur:7,migrat:[3,6],millisecond:0,minim:0,mobilenet:10,mobilenet_v1:[],mobilenetv1:[],mobilenetv2:10,mode:6,model:[0,1,3,4,8],model_path:0,moder:10,modifi:1,modul:[2,8],monitor:3,monopol:3,more:[0,6],most:0,ms_per_batch:0,ms_per_item:0,multi:[0,6],multipl:6,must:0,name:[0,1],name_of_log:3,natur:8,ndarrai:0,nearli:[8,10],need:6,network:[0,3,4,8],neural:[0,3,8],neuralmag:[3,4,8],neuralmagic_cr:4,neuralmagic_execut:4,newer:7,nightli:8,nm_arch:0,nm_bind_threads_to_cor:[3,6],nm_exec_test_it:4,nm_execution_provid:[3,4],nm_logging_level:[3,4],nm_ogging_level:3,nm_ort:[3,4],nm_subgraph:3,nm_subgraph_1:[3,4],nmexecutionprovid:4,node:[4,5],non:4,none:[0,1,4,10],normal:3,notabl:3,note:[0,3,5,8],notebook:9,num_batch:0,num_cor:0,num_item:0,num_iter:0,num_socket:0,num_warmup_iter:0,numactl:[5,8],number:[0,5,6],numpi:[0,1,10],nyann:[3,4],object:[0,1],obtain:5,occur:8,offici:8,okai:[],onc:3,one:[0,3,6,8],onli:[0,3,4,6,8],onnx:[0,2,4,8],onnx_filenam:4,onnx_filepath:[1,10],onnxruntime_neuralmag:[3,4],openmp:3,oper:[3,4,7],ops:[3,4],optim:[0,4,6,7,8,10],optim_categori:[],optim_nam:[],optimization_level:0,option:[0,1],order:[0,3,9],ort:[3,4],other:[0,3,6,8,10],otherwis:0,our:5,out:[0,3,6],output:[0,1,3,4,10],output_data:4,outsid:6,over:[0,7,8],overprecis:8,overrid:[0,1],override_onnx_batch_s:1,own:6,packag:[2,8,10],pad:[3,4],page:6,pair:1,param:1,paramet:[0,3,6],parameter:8,pars:[4,5,8],part:3,particular:6,pass:0,path:[0,1],pattern:4,per:[0,3,6],percentag:[0,3],perform:[0,5,6,8,10],physcpubind:6,physic:[0,6],pin:[3,5,8],pinpoint:3,pip:9,pipelin:[3,8],planner:4,pleas:0,plu:8,plug:10,point:3,polici:3,pool:3,portion:[3,4],possibl:3,practic:3,pre:10,predict:10,prefer:6,prefix:0,prepar:8,prevent:[3,6],print:[3,4,10],privaci:3,process:[3,6,8],produc:[5,8],product:8,progress:0,properti:0,provid:[1,3,4],prunabl:0,prune:[0,8,10],pruned_qu:10,pypi:8,python:[0,3,9],pytorch:[8,10],quant:10,quantiz:[0,7,8,10],quick:8,quickli:[0,8,10],quickstart:8,rais:0,rand:0,randn:10,random:[0,1,4,10],rang:6,ratio:4,raw:10,real:[3,10],recip:[8,10],recommend:[0,3,7,9],record:0,recov:[8,10],reduc:8,redund:8,refer:1,regular:0,rel:1,relat:0,relu:[0,3,4],remov:8,repeat:[0,10],repo:4,report:3,repositori:[8,9],request:8,requir:[4,6,8,9],reshap:[3,4],resnet:10,resnet_50:10,resnet_v1:10,resolut:[3,4],resourc:[3,5],respect:10,restrict:6,result:[0,3,6,8],reveal:3,review:3,rewrit:1,rtol:1,run:[0,3,4,6,7,10],run_model:3,runtim:[4,5,8],sai:[],same:[6,8],sampl:[3,10],sample_batch:10,sample_input:10,save:[1,10],scale:8,scarc:3,scienc:3,script:3,seamlessli:8,search_optimized_model:10,second:[0,6],section:10,see:[0,3,6,10],seek:3,select:6,self:3,separ:6,serv:3,server:6,set:[0,3,6,10],setup:0,sever:3,shape:[1,3,4],share:[3,6,10],shell:3,ship:3,should:6,show:[3,6],show_progress:0,shuffl:0,signific:0,significantli:8,similar:1,similarli:6,simpl:8,simpli:10,simplif:3,sinc:6,singl:[0,6],size:[0,1,4,5],skylak:7,small:[],smaller:8,smt:6,socket:[0,6],softwar:3,solut:10,some:[0,3,7],sourc:[0,1,8],spars:[7,8,10],sparseml:[8,10],sparsezoo:[0,8],sparsif:10,sparsifi:[8,10],sparsiti:[0,7,8],specif:[0,6,7],specifi:[3,6],speedup:[0,10],split:3,src:[3,4,8],stabl:8,standard:[0,6],start:[0,3,4,6,8],startup:6,state:[0,3,10],statement:3,statist:[0,5],stderr:3,steadi:0,step:8,store:0,str:[0,1],straightforward:6,stride:[3,4],string:1,structur:0,stub:[0,10],subgraph:[4,5],submodul:[2,8],subpackag:[2,8],suffici:1,suit:8,summari:0,super_resolut:4,support:[0,4,5,8,10],supported_subgraph:[3,4],svg:8,system:[0,3,4,6,7,9],tabl:7,tag:10,take:[0,8],target:10,tbb:3,technic:3,techniqu:[8,10],tensor:[0,1,3],tensorflow:8,test:[4,9],test_1:4,than:[3,4,6,10],thei:[1,3],them:3,thi:[0,3,6,8,9,10],thread:[3,5,8],through:[0,6],tiger:7,time:[0,5],time_elaps:0,time_end:0,time_start:0,timed_run:[0,10],toler:1,too:10,took:0,top:8,torch:[3,4],total:4,tour:8,tradit:3,train:[3,8,10],translat:[3,4],transpos:[3,4],troubleshoot:3,truncat:3,tune:[5,8],tupl:0,two:[1,6,10],txt:3,type:[0,1,3],union:0,uniqu:3,unit:4,unlik:3,unoptim:3,unpreced:[],unsupport:[3,4],use:[0,3,6,10],used:[0,6],using:[0,3,4,6,8,9,10],usual:[1,6],util:[0,2,3,4,5,8,10],val_inp:0,valid:[0,7],validate_minimum_supported_fract:[3,4],valu:[3,4],variabl:[0,3,6],variant:[],variou:3,vector:0,verbos:[3,5,8],veri:[3,10],verify_output:1,version:[2,4,8,10],via:8,view:5,virtual:9,vision:10,vnni:[0,7,10],wai:6,wand:4,want:3,warm:0,warn:[3,4,10],websit:8,weight:[0,3,4],well:[8,10],were:0,wget:10,what:[0,3,10],when:[0,6,8,10],where:[3,4,6],whether:3,which:[3,6],whole:5,width:[0,8],winograd:8,within:[3,8],work:[6,7,8],workload:8,would:[3,6,9],x86:7,yet:0,you:[0,3,6,8,9,10],your:[3,8,9,10],zen:7,zoo:[0,10]},titles:["deepsparse package","deepsparse.utils package","deepsparse","Logging Guidance for Diagnostics and Debugging","Example Log, Verbose Level = diagnose","Debugging and Optimizing","Using the numactl Utility to Control Resource Utilization with the DeepSparse Engine","Hardware Support","DeepSparse 0.1","Installation","Quick Tour"],titleterms:{Using:6,addit:6,amount:3,batch:3,benchmark:0,compat:8,compil:3,content:[0,1],control:[3,6],core:3,cpu:0,custom:10,data:1,debug:[3,5],deepspars:[0,1,2,3,6,8],determin:3,diagnos:4,diagnost:3,each:3,enabl:3,engin:[0,3,6],exampl:[3,4],execut:3,find:3,graph:3,guidanc:3,hardwar:7,histori:8,instal:9,learn:8,level:4,lib:0,log:[1,3,4],model:10,modul:[0,1],more:8,node:3,note:6,numactl:6,number:3,obtain:3,onnx:[1,10],optim:[3,5],our:3,overview:8,packag:[0,1],pars:3,perform:3,pin:6,produc:3,product:[],quick:10,quickstart:10,relat:[],releas:8,resourc:[6,8],runtim:3,size:3,sparsezoo:10,sparsif:8,statist:3,subgraph:3,submodul:[0,1],subpackag:0,support:[3,7],thread:6,time:3,tour:10,tune:3,util:[1,6],verbos:4,version:0,view:3,whole:3}}) \ No newline at end of file