Merge remote-tracking branch 'origin/main' into feature/damian/genera…

…te_until
neuralmagic · Feb 5, 2024 · d0698e7 · d0698e7
2 parents b5a6d6d + 59e0602
commit d0698e7
Show file tree

Hide file tree

Showing 5 changed files with 49 additions and 21 deletions.
diff --git a/src/deepsparse/__init__.py b/src/deepsparse/__init__.py
@@ -38,6 +38,7 @@
 from .version import __version__, is_release
 from .analytics import deepsparse_analytics as _analytics
 from .subgraph_execute import *
+from .analyze import analyze
 from .evaluation.evaluator import evaluate
 
 _analytics.send_event("python__init")
diff --git a/src/deepsparse/analyze.py b/src/deepsparse/analyze.py
@@ -31,7 +31,11 @@
     ModelAnalysis,
     NodeInferenceResult,
 )
-from sparsezoo.analyze.cli import analyze_options, analyze_performance_options
+from sparsezoo.analyze.cli import (
+    DEEPSPARSE_ENGINE,
+    analyze_options,
+    analyze_performance_options,
+)
 
 
 _LOGGER = logging.getLogger(__name__)
@@ -74,21 +78,11 @@ def main(
             )
 
     _LOGGER.info("Starting Analysis ...")
-    analysis = ModelAnalysis.create(model_path)
-    _LOGGER.info("Analysis complete, collating results...")
-    scenario = BenchmarkScenario(
-        batch_size=batch_size_throughput,
-        num_cores=None,
-        engine=benchmark_engine,
-    )
-    performance_summary = run_benchmark_and_analysis(
-        onnx_model=model_to_path(model_path),
-        scenario=scenario,
-    )
+    analysis = analyze(model_path, batch_size_throughput, benchmark_engine)
+
     by_types: bool = convert_to_bool(by_types)
     by_layers: bool = convert_to_bool(by_layers)
 
-    analysis.benchmark_results = [performance_summary]
     summary = analysis.summary(
         by_types=by_types,
         by_layers=by_layers,
@@ -103,13 +97,9 @@ def main(
 
         print("Comparison Analysis:")
         for model_to_compare in compare:
-            compare_model_analysis = ModelAnalysis.create(model_to_compare)
-            _LOGGER.info(f"Running Performance Analysis on {model_to_compare}")
-            performance_summary = run_benchmark_and_analysis(
-                onnx_model=model_to_path(model_to_compare),
-                scenario=scenario,
+            compare_model_analysis = analyze(
+                model_to_compare, batch_size_throughput, benchmark_engine
             )
-            compare_model_analysis.benchmark_results = [performance_summary]
             summary_comparison_model = compare_model_analysis.summary(
                 by_types=by_types,
                 by_layers=by_layers,
@@ -124,6 +114,34 @@ def main(
         analysis.yaml(file_path=save)
 
 
+def analyze(
+    model_path,
+    batch_size_throughput: int = 1,
+    benchmark_engine: str = DEEPSPARSE_ENGINE,
+) -> ModelAnalysis:
+    """
+    :param model_path: Local filepath to an ONNX model, or a SparseZoo stub
+    :param batch_size_throughput: Batch size for throughput benchmark
+    :param benchmark_engine: Benchmark engine to use, can be 'deepsparse' or
+        'onnxruntime', defaults to 'deepsparse'
+    :return: A `ModelAnalysis` object encapsulating the results of the analysis
+    """
+    analysis = ModelAnalysis.create(model_path)
+    _LOGGER.info("Analysis complete, collating results...")
+    scenario = BenchmarkScenario(
+        batch_size=batch_size_throughput,
+        num_cores=None,
+        engine=benchmark_engine,
+    )
+    performance_summary = run_benchmark_and_analysis(
+        onnx_model=model_to_path(model_path),
+        scenario=scenario,
+    )
+
+    analysis.benchmark_results = [performance_summary]
+    return analysis
+
+
 def run_benchmark_and_analysis(
     onnx_model: str,
     scenario: BenchmarkScenario,

diff --git a/src/deepsparse/benchmark/benchmark_model.py b/src/deepsparse/benchmark/benchmark_model.py
@@ -411,6 +411,11 @@ def benchmark_model(
         if not disable_kv_cache_overrides:
             if not sequence_length:
                 sequence_length = infer_sequence_length(model_path)
+                if not sequence_length:
+                    raise ValueError(
+                        "Unable to infer sequence length from model. "
+                        "Specify it manually through `sequence_length` argument."
+                    )
             if input_ids_length > sequence_length:
                 raise ValueError(
                     f"input_ids_length: {input_ids_length} "

diff --git a/src/deepsparse/evaluation/cli.py b/src/deepsparse/evaluation/cli.py
@@ -92,6 +92,8 @@
     "model_path",
     type=click.Path(dir_okay=True, file_okay=True),
     required=True,
+    help="A path to a remote or local directory containing ONNX/torch model "
+    "(including all the auxiliary files) or a SparseZoo stub",
 )
 @click.option(
     "-d",

diff --git a/src/deepsparse/utils/onnx.py b/src/deepsparse/utils/onnx.py
@@ -613,7 +613,8 @@ def has_model_kv_cache(model: Union[str, ModelProto]) -> bool:
 def infer_sequence_length(model: Union[str, ModelProto]) -> int:
     """
     :param model: model
-    :return: inferred sequence length of the model
+    :return: inferred sequence length of the model.
+        If unable to infer, return 0
     """
     if not isinstance(model, ModelProto):
         model = onnx.load(model, load_external_data=False)
@@ -623,9 +624,10 @@ def infer_sequence_length(model: Union[str, ModelProto]) -> int:
     for idx, inp in enumerate(model.graph.input):
         if inp.name == "attention_mask":
             target_input_idx = idx
+            break
     try:
         # return shape of second dim if possible
         target_input = model.graph.input[target_input_idx]
         return target_input.type.tensor_type.shape.dim[1].dim_value
     except Exception:
-        return 0  # unable to infer seq len
+        return 0