From 596b10cb9ad9f3a36b3cd73913e035d4cf392b01 Mon Sep 17 00:00:00 2001 From: mtairum Date: Wed, 29 Jan 2025 15:24:48 +0000 Subject: [PATCH] #0: Add new metric to the llama benchmark --- models/demos/llama3/demo/simple_text_demo.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/models/demos/llama3/demo/simple_text_demo.py b/models/demos/llama3/demo/simple_text_demo.py index 8f9015d3c6ae..67cb1e35e18c 100644 --- a/models/demos/llama3/demo/simple_text_demo.py +++ b/models/demos/llama3/demo/simple_text_demo.py @@ -724,6 +724,20 @@ def test_llama_demo_text( target=None, ) + # Also save the avg decode performance for the 128 iterations (excluding the compile time) + inference_decode_time_first_128 = sum( + profiler.get_duration(f"inference_decode_time_{i}") for i in range(1, 128) + ) + benchmark_data.add_measurement( + profiler, + 0, + "inference_decode", + "avg_decode_time_first_128", + inference_decode_time_first_128 * 1000 / 127, + step_warm_up_num_iterations=None, + target=None, + ) + benchmark_data.save_partial_run_json( profiler, run_type=f"{tt_device_name}-demo",