From 92bfd817377fc6f1bf9ae55069c6e830f4a61b70 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Wed, 19 Apr 2023 10:43:38 -0400 Subject: [PATCH] Use INT8 input for quantized models (#201) * Added call to function that skips the quantization of the input if the model is quantized * Pass model path as string --- utils/neuralmagic/utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/utils/neuralmagic/utils.py b/utils/neuralmagic/utils.py index a39f7b23da01..db5b0a24153e 100644 --- a/utils/neuralmagic/utils.py +++ b/utils/neuralmagic/utils.py @@ -7,6 +7,7 @@ import torch import yaml from sparseml.pytorch.optim import ScheduledModifierManager +from sparseml.pytorch.sparsification.quantization import skip_onnx_input_quantize from sparseml.pytorch.utils import ModuleExporter, download_framework_model_by_recipe_type from sparseml.onnx.utils import override_model_input_shape from sparsezoo import Model @@ -214,6 +215,11 @@ def neuralmagic_onnx_export( saved_model_path = save_dir / onnx_file_name + try: + skip_onnx_input_quantize(str(saved_model_path), str(saved_model_path)) + except Exception: + pass + # set model input shape to a static shape (graph is still dynamic compatible) # for performance with deepsparse engine + extractable shape for analysis sample_data_shape = list(sample_data.shape)