Use INT8 input for quantized models (#201)

* Added call to function that skips the quantization of the input if the model is quantized * Pass model path as string
neuralmagic · Apr 19, 2023 · 92bfd81 · 92bfd81
1 parent 56e2d04
commit 92bfd81
Showing 1 changed file with 6 additions and 0 deletions.
diff --git a/utils/neuralmagic/utils.py b/utils/neuralmagic/utils.py
@@ -7,6 +7,7 @@
 import torch
 import yaml
 from sparseml.pytorch.optim import ScheduledModifierManager
+from sparseml.pytorch.sparsification.quantization import skip_onnx_input_quantize
 from sparseml.pytorch.utils import ModuleExporter, download_framework_model_by_recipe_type
 from sparseml.onnx.utils import override_model_input_shape
 from sparsezoo import Model
@@ -214,6 +215,11 @@ def neuralmagic_onnx_export(
 
     saved_model_path = save_dir / onnx_file_name
 
+    try:
+        skip_onnx_input_quantize(str(saved_model_path), str(saved_model_path))
+    except Exception:
+        pass
+
     # set model input shape to a static shape (graph is still dynamic compatible)
     # for performance with deepsparse engine + extractable shape for analysis
     sample_data_shape = list(sample_data.shape)