diff --git a/tt_metal/tools/profiler/profiler.hpp b/tt_metal/tools/profiler/profiler.hpp index 78227d0e2c73..79bbf2a37777 100644 --- a/tt_metal/tools/profiler/profiler.hpp +++ b/tt_metal/tools/profiler/profiler.hpp @@ -11,6 +11,7 @@ #include #include "tt_metal/impl/buffers/buffer.hpp" +#include "tt_metal/impl/program/program.hpp" #include "llrt/llrt.hpp" #include "tools/profiler/profiler_state.hpp" #include "tools/profiler/common.hpp" @@ -101,6 +102,7 @@ class DeviceProfiler { //DRAM buffer for device side results std::shared_ptr output_dram_buffer = nullptr; + std::shared_ptr sync_program = nullptr; // Device-core Syncdata std::map> device_core_sync_info; diff --git a/tt_metal/tools/profiler/tt_metal_profiler.cpp b/tt_metal/tools/profiler/tt_metal_profiler.cpp index 9920dd40ad25..512a4991294a 100644 --- a/tt_metal/tools/profiler/tt_metal_profiler.cpp +++ b/tt_metal/tools/profiler/tt_metal_profiler.cpp @@ -43,7 +43,7 @@ std::unordered_map smallestHostime; constexpr CoreCoord SYNC_CORE = {0,0}; -void syncDeviceHost(Device *device, CoreCoord logical_core, bool doHeader) +void syncDeviceHost(Device *device, CoreCoord logical_core, std::shared_ptr &sync_program, bool doHeader) { if (!tt::llrt::OptionsG.get_profiler_sync_enabled()) return; ZoneScopedC(tracy::Color::Tomato3); @@ -53,23 +53,24 @@ void syncDeviceHost(Device *device, CoreCoord logical_core, bool doHeader) deviceHostTimePair.emplace(device_id, (std::vector >){}); smallestHostime.emplace(device_id, 0); - tt_metal::Program program = tt_metal::CreateProgram(); - constexpr uint16_t sampleCount = 249; - std::map kernel_defines = { - {"SAMPLE_COUNT", std::to_string(sampleCount)}, - }; - - tt_metal::KernelHandle brisc_kernel = tt_metal::CreateKernel( - program, "tt_metal/tools/profiler/sync/sync_kernel.cpp", - logical_core, - tt_metal::DataMovementConfig{ - .processor = tt_metal::DataMovementProcessor::RISCV_0, - .noc = tt_metal::NOC::RISCV_0_default, - .defines = kernel_defines} - ); - - EnqueueProgram(device->command_queue(), program, false); + if (sync_program == nullptr) { + sync_program = std::make_shared(); + + std::map kernel_defines = { + {"SAMPLE_COUNT", std::to_string(sampleCount)}, + }; + + tt_metal::KernelHandle brisc_kernel = tt_metal::CreateKernel( + *sync_program, "tt_metal/tools/profiler/sync/sync_kernel.cpp", + logical_core, + tt_metal::DataMovementConfig{ + .processor = tt_metal::DataMovementProcessor::RISCV_0, + .noc = tt_metal::NOC::RISCV_0_default, + .defines = kernel_defines} + ); + } + EnqueueProgram(device->command_queue(), sync_program, false); std::filesystem::path output_dir = std::filesystem::path(string(PROFILER_RUNTIME_ROOT_DIR) + string(PROFILER_LOGS_DIR_NAME)); std::filesystem::path log_path = output_dir / "sync_device_info.csv"; @@ -287,7 +288,7 @@ void InitDeviceProfiler(Device *device){ tt_metal::detail::WriteToBuffer(tt_metal_device_profiler_map.at(device_id).output_dram_buffer, inputs_DRAM); if (doSync) { - syncDeviceHost (device, SYNC_CORE, doHeader); + syncDeviceHost (device, SYNC_CORE, tt_metal_device_profiler_map.at(device_id).sync_program, doHeader); } } #endif @@ -406,7 +407,7 @@ void DumpDeviceProfileResults(Device *device, std::vector &worker_cor { if (!lastDump) { - syncDeviceHost (device, SYNC_CORE, false); + syncDeviceHost (device, SYNC_CORE, tt_metal_device_profiler_map.at(device_id).sync_program, false); } tt_metal_device_profiler_map.at(device_id).setDeviceArchitecture(device->arch()); tt_metal_device_profiler_map.at(device_id).dumpResults(device, worker_cores);