From 92bfd817377fc6f1bf9ae55069c6e830f4a61b70 Mon Sep 17 00:00:00 2001
From: Alexandre Marques <alexandre@neuralmagic.com>
Date: Wed, 19 Apr 2023 10:43:38 -0400
Subject: [PATCH] Use INT8 input for quantized models (#201)

* Added call to function that skips the quantization of the input if the model is quantized

* Pass model path as string
---
 utils/neuralmagic/utils.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/utils/neuralmagic/utils.py b/utils/neuralmagic/utils.py
index a39f7b23da01..db5b0a24153e 100644
--- a/utils/neuralmagic/utils.py
+++ b/utils/neuralmagic/utils.py
@@ -7,6 +7,7 @@
 import torch
 import yaml
 from sparseml.pytorch.optim import ScheduledModifierManager
+from sparseml.pytorch.sparsification.quantization import skip_onnx_input_quantize
 from sparseml.pytorch.utils import ModuleExporter, download_framework_model_by_recipe_type
 from sparseml.onnx.utils import override_model_input_shape
 from sparsezoo import Model
@@ -214,6 +215,11 @@ def neuralmagic_onnx_export(
 
     saved_model_path = save_dir / onnx_file_name
 
+    try:
+        skip_onnx_input_quantize(str(saved_model_path), str(saved_model_path))
+    except Exception:
+        pass
+
     # set model input shape to a static shape (graph is still dynamic compatible)
     # for performance with deepsparse engine + extractable shape for analysis
     sample_data_shape = list(sample_data.shape)