Skip to content

Commit

Permalink
#9174: Cache profiler sync program to enable use with trace
Browse files Browse the repository at this point in the history
  • Loading branch information
tt-aho authored and mo-tenstorrent committed Jun 7, 2024
1 parent 6cbbb6e commit 46c7f2f
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 20 deletions.
2 changes: 2 additions & 0 deletions tt_metal/tools/profiler/profiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <filesystem>

#include "tt_metal/impl/buffers/buffer.hpp"
#include "tt_metal/impl/program/program.hpp"
#include "llrt/llrt.hpp"
#include "tools/profiler/profiler_state.hpp"
#include "tools/profiler/common.hpp"
Expand Down Expand Up @@ -101,6 +102,7 @@ class DeviceProfiler {

//DRAM buffer for device side results
std::shared_ptr<tt::tt_metal::Buffer> output_dram_buffer = nullptr;
std::shared_ptr<tt::tt_metal::Program> sync_program = nullptr;

// Device-core Syncdata
std::map<CoreCoord, std::tuple<double,double,double>> device_core_sync_info;
Expand Down
42 changes: 22 additions & 20 deletions tt_metal/tools/profiler/tt_metal_profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ std::unordered_map <uint32_t, uint64_t> smallestHostime;

constexpr CoreCoord SYNC_CORE = {0,0};

void syncDeviceHost(Device *device, CoreCoord logical_core, bool doHeader)
void syncDeviceHost(Device *device, CoreCoord logical_core, std::shared_ptr<tt_metal::Program> &sync_program, bool doHeader)
{
if (!tt::llrt::OptionsG.get_profiler_sync_enabled()) return;
ZoneScopedC(tracy::Color::Tomato3);
Expand All @@ -53,23 +53,24 @@ void syncDeviceHost(Device *device, CoreCoord logical_core, bool doHeader)
deviceHostTimePair.emplace(device_id, (std::vector <std::pair<uint64_t,uint64_t>>){});
smallestHostime.emplace(device_id, 0);

tt_metal::Program program = tt_metal::CreateProgram();

constexpr uint16_t sampleCount = 249;
std::map<string, string> kernel_defines = {
{"SAMPLE_COUNT", std::to_string(sampleCount)},
};

tt_metal::KernelHandle brisc_kernel = tt_metal::CreateKernel(
program, "tt_metal/tools/profiler/sync/sync_kernel.cpp",
logical_core,
tt_metal::DataMovementConfig{
.processor = tt_metal::DataMovementProcessor::RISCV_0,
.noc = tt_metal::NOC::RISCV_0_default,
.defines = kernel_defines}
);

EnqueueProgram(device->command_queue(), program, false);
if (sync_program == nullptr) {
sync_program = std::make_shared<tt_metal::Program>();

std::map<string, string> kernel_defines = {
{"SAMPLE_COUNT", std::to_string(sampleCount)},
};

tt_metal::KernelHandle brisc_kernel = tt_metal::CreateKernel(
*sync_program, "tt_metal/tools/profiler/sync/sync_kernel.cpp",
logical_core,
tt_metal::DataMovementConfig{
.processor = tt_metal::DataMovementProcessor::RISCV_0,
.noc = tt_metal::NOC::RISCV_0_default,
.defines = kernel_defines}
);
}
EnqueueProgram(device->command_queue(), sync_program, false);

std::filesystem::path output_dir = std::filesystem::path(string(PROFILER_RUNTIME_ROOT_DIR) + string(PROFILER_LOGS_DIR_NAME));
std::filesystem::path log_path = output_dir / "sync_device_info.csv";
Expand Down Expand Up @@ -287,7 +288,7 @@ void InitDeviceProfiler(Device *device){
tt_metal::detail::WriteToBuffer(tt_metal_device_profiler_map.at(device_id).output_dram_buffer, inputs_DRAM);
if (doSync)
{
syncDeviceHost (device, SYNC_CORE, doHeader);
syncDeviceHost (device, SYNC_CORE, tt_metal_device_profiler_map.at(device_id).sync_program, doHeader);
}
}
#endif
Expand Down Expand Up @@ -406,15 +407,16 @@ void DumpDeviceProfileResults(Device *device, std::vector<CoreCoord> &worker_cor
{
if (!lastDump)
{
syncDeviceHost (device, SYNC_CORE, false);
syncDeviceHost (device, SYNC_CORE, tt_metal_device_profiler_map.at(device_id).sync_program, false);
}
tt_metal_device_profiler_map.at(device_id).setDeviceArchitecture(device->arch());
tt_metal_device_profiler_map.at(device_id).dumpResults(device, worker_cores);
if (lastDump)
{
// Process is ending, no more device dumps are coming, reset your ref on the buffer so deallocate is the last
// owner.
// owner. Sync program also contains a buffer so it is safter to release it here
tt_metal_device_profiler_map.at(device_id).output_dram_buffer.reset();
tt_metal_device_profiler_map.at(device_id).sync_program.reset();
}
else
{
Expand Down

0 comments on commit 46c7f2f

Please sign in to comment.