Skip to content

Commit

Permalink
Use INT8 input for quantized models (#201)
Browse files Browse the repository at this point in the history
* Added call to function that skips the quantization of the input if the model is quantized

* Pass model path as string
  • Loading branch information
anmarques committed Apr 19, 2023
1 parent 56e2d04 commit 92bfd81
Showing 1 changed file with 6 additions and 0 deletions.
6 changes: 6 additions & 0 deletions utils/neuralmagic/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import torch
import yaml
from sparseml.pytorch.optim import ScheduledModifierManager
from sparseml.pytorch.sparsification.quantization import skip_onnx_input_quantize
from sparseml.pytorch.utils import ModuleExporter, download_framework_model_by_recipe_type
from sparseml.onnx.utils import override_model_input_shape
from sparsezoo import Model
Expand Down Expand Up @@ -214,6 +215,11 @@ def neuralmagic_onnx_export(

saved_model_path = save_dir / onnx_file_name

try:
skip_onnx_input_quantize(str(saved_model_path), str(saved_model_path))
except Exception:
pass

# set model input shape to a static shape (graph is still dynamic compatible)
# for performance with deepsparse engine + extractable shape for analysis
sample_data_shape = list(sample_data.shape)
Expand Down

0 comments on commit 92bfd81

Please sign in to comment.