Skip to content

Commit

Permalink
[Snippets] Add debug caps for dumping snippets parameters (#28378)
Browse files Browse the repository at this point in the history
### Details:
- Add infrastructure to dump snippets parameters using
`OV_SNIPPETS_DUMP_PARAMS` environment variable
 - Enable caps for brgemm snippet

### Tickets:
 - 137302
  • Loading branch information
aobolensk authored Feb 4, 2025
1 parent e4dbd33 commit c27f796
Show file tree
Hide file tree
Showing 12 changed files with 478 additions and 52 deletions.
20 changes: 20 additions & 0 deletions src/common/snippets/docs/debug_capabilities/parameters_dump.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Snippet parameters dump

The pass dumps selected properties of some performance-critical operations in Subgraphs. Only MatMuls are currently supported by this pass.

To turn on snippet properties dump feature, the following environment variable should be used:
```sh
OV_SNIPPETS_DUMP_BRGEMM_PARAMS="path=<path_to_csv_dump_file>" binary ...
```

Examples:
```sh
OV_SNIPPETS_DUMP_BRGEMM_PARAMS="path=brgemm.csv" binary ...
```

Output example:

| subgraph_name | name | in_type | out_type | in_shapes | out_shapes | in_layouts | out_layouts | M | N | K | m_block | n_block | k_block | acc_max_time | avg_max_time |
|--------------------|------------|-------------|----------|-------------------------------------|----------------------|--------------------------|-------------|-----|-----|-----|---------|----------|----------|---------------|---------------|
| FakeQuantitze_457 | MatMul_438 | i8;i8;f32 | i32 | 1 16 128 64;1 16 64 128;1 16 64 128 | 1 16 128 128 | 0 2 1 3;0 1 2 3;0 1 2 3; | 0 1 2 3; | 128 | 128 | 64 | 32 | FULL_DIM | FULL_DIM | 41482 | 5185 |
| FakeQuantitze_457 | MatMul_452 | u8;i8 | i32 | 1 16 128 128;1 16 128 64 | 1 16 128 64 | 0 1 2 3;0 1 2 3; | 0 1 2 3; | 128 | 64 | 128 | 32 | FULL_DIM | FULL_DIM | 39427 | 4928 |
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright (C) 2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#ifdef SNIPPETS_DEBUG_CAPS

#pragma once

#include "snippets/itt.hpp"
#include "snippets/lowered/loop_manager.hpp"
#include "snippets/lowered/specific_loop_iter_handlers.hpp"
#include "snippets/lowered/pass/iter_handler.hpp"
#include "snippets/op/brgemm.hpp"
#include "snippets/utils/utils.hpp"

namespace ov {
namespace snippets {
namespace lowered {
namespace pass {

/**
* @interface InsertPerfCountVerbose
* @brief Brgemm parameters dump pass
* @ingroup snippets
*/
class InsertPerfCountVerbose : public snippets::lowered::pass::RangedPass {
public:
InsertPerfCountVerbose(const std::string& subgraph_name) : m_subgraph_name(subgraph_name) {}
OPENVINO_RTTI("InsertPerfCountVerbose", "", RangedPass);

bool run(snippets::lowered::LinearIR& linear_ir,
snippets::lowered::LinearIR::constExprIt begin,
snippets::lowered::LinearIR::constExprIt end) override;

private:
std::string collect_params(const ov::snippets::lowered::ExpressionPtr& brgemm_expr,
const snippets::lowered::LinearIR& linear_ir);

std::string m_subgraph_name;
};

} // namespace pass
} // namespace lowered
} // namespace snippets
} // namespace ov

#endif // SNIPPETS_DEBUG_CAPS
81 changes: 75 additions & 6 deletions src/common/snippets/include/snippets/op/perf_count.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,65 @@

namespace ov {
namespace snippets {

namespace op {
class PerfCountEnd;
} // namespace op

namespace utils {

/**
* @interface Dumper
* @brief Dumper for node debug properties
* @ingroup snippets
*/
class Dumper {
public:
Dumper() = default;
virtual ~Dumper() = default;

void init(const std::string &params);
virtual void update(const op::PerfCountEnd* node) = 0;
protected:
std::map<std::string, std::string> m_debug_params_map;
std::string m_params;
};

/**
* @interface ConsoleDumper
* @brief Dumper for node debug properties (output: stdout)
* @ingroup snippets
*/
class ConsoleDumper : public Dumper {
public:
ConsoleDumper() = default;
~ConsoleDumper() override;

void update(const op::PerfCountEnd* node) override;

private:
ov::threading::ThreadLocal<uint64_t> m_accumulation;
ov::threading::ThreadLocal<uint32_t> m_iteration;
};

/**
* @interface CSVDumper
* @brief Dumper for node debug properties (output: .csv file)
* @ingroup snippets
*/
class CSVDumper : public Dumper {
public:
CSVDumper(const std::string csv_path);
~CSVDumper() override;

void update(const op::PerfCountEnd* node) override;

private:
const std::string csv_path;
};

} // namespace utils

namespace op {

/**
Expand Down Expand Up @@ -73,20 +132,30 @@ class PerfCountBegin : public PerfCountBeginBase {
class PerfCountEnd : public PerfCountEndBase {
public:
OPENVINO_OP("PerfCountEnd", "SnippetsOpset", PerfCountEndBase);
PerfCountEnd(const Output<Node>& pc_begin);
PerfCountEnd() = default;
~PerfCountEnd() {
output_perf_count();
}
void output_perf_count();
PerfCountEnd(const Output<Node>& pc_begin,
std::vector<std::shared_ptr<utils::Dumper>> dumpers = {},
const std::string& params = "");
PerfCountEnd();
~PerfCountEnd();

std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override;

void init_pc_begin();
void set_accumulated_time();

const ov::threading::ThreadLocal<uint64_t> &get_accumulation() const {
return accumulation;
}

const ov::threading::ThreadLocal<uint32_t> &get_iteration() const {
return iteration;
}

private:
ov::threading::ThreadLocal<uint64_t> accumulation;
ov::threading::ThreadLocal<uint32_t> iteration;

std::vector<std::shared_ptr<utils::Dumper>> dumpers;
std::shared_ptr<PerfCountBegin> m_pc_begin = nullptr;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,15 @@ class DebugCapsConfig {
}
} dumpLIR;

struct : PropertyGroup {
std::string csv_path;
std::vector<PropertySetterPtr> getPropertySetters() override {
return {
PropertySetterPtr(new StringPropertySetter("path", csv_path, "path to dumped brgemm params")),
};
}
} dumpParams;

// Snippets performance count mode
// Disabled - default, w/o perf count for snippets
// Chrono - perf count with chrono call. This is a universal method, and support multi-thread case to output perf
Expand Down
9 changes: 9 additions & 0 deletions src/common/snippets/include/snippets/utils/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,15 @@ void visit_path(const lowered::ExpressionPtr& expr,
std::function<void(lowered::ExpressionPtr)> func,
bool visit_parent_path);

/**
* @brief Converts a tensor to a string representation.
* Each value in the tensor is converted to a string. If the value is a full dimension, it is represented as
* "FULL_DIM". If the value is dynamic, it is represented as "?".
* @param tensor The tensor to be converted to a string.
* @return A string representation of the tensor.
*/
std::string tensor2str(const VectorDims& tensor, const std::string& delimiter = ", ");

} // namespace utils
} // namespace snippets
} // namespace ov
16 changes: 2 additions & 14 deletions src/common/snippets/src/lowered/expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,18 +170,6 @@ ExpressionPtr Expression::clone() const {
}

bool Expression::visit_attributes(AttributeVisitor &visitor) {
auto subtensor2str = [](const VectorDims& subtensor) {
std::stringstream ss;
for (size_t i = 0; i < subtensor.size(); ++i) {
const auto& v = subtensor[i];
const auto v_str = utils::is_full_dim_value(v) ? "FULL_DIM" :
utils::is_dynamic_value(v) ? "?" : std::to_string(v);
const auto del = i < subtensor.size() - 1 ? ", " : "";
ss << v_str << del;
}
return ss.str();
};

std::ostringstream in_regs, out_regs;
std::vector<std::pair<std::string, ov::PartialShape>> shapes;
std::vector<std::pair<std::string, std::string>> subtensors;
Expand All @@ -194,7 +182,7 @@ bool Expression::visit_attributes(AttributeVisitor &visitor) {

const auto& subtensor = desc->get_subtensor();
if (!subtensor.empty())
subtensors.emplace_back("in_subtensor_" + std::to_string(i), subtensor2str(subtensor));
subtensors.emplace_back("in_subtensor_" + std::to_string(i), utils::tensor2str(subtensor));

const auto& layout = desc->get_layout();
if (!layout.empty() && !utils::is_planar_layout(layout))
Expand All @@ -210,7 +198,7 @@ bool Expression::visit_attributes(AttributeVisitor &visitor) {

const auto& subtensor = desc->get_subtensor();
if (!subtensor.empty())
subtensors.emplace_back("out_subtensor_" + std::to_string(i), subtensor2str(subtensor));
subtensors.emplace_back("out_subtensor_" + std::to_string(i), utils::tensor2str(subtensor));

const auto& layout = desc->get_layout();
if (!layout.empty() && !utils::is_planar_layout(layout))
Expand Down
157 changes: 157 additions & 0 deletions src/common/snippets/src/lowered/pass/insert_perf_count_verbose.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
// Copyright (C) 2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#ifdef SNIPPETS_DEBUG_CAPS
#include "snippets/lowered/pass/insert_perf_count_verbose.hpp"

#include "snippets/itt.hpp"
#include "snippets/lowered/linear_ir.hpp"
#include "snippets/lowered/loop_manager.hpp"
#include "snippets/lowered/pass/pass.hpp"
#include "snippets/lowered/pass/propagate_subtensors.hpp"
#include "snippets/lowered/pass/iter_handler.hpp"
#include "snippets/snippets_isa.hpp"
#include "snippets/utils/utils.hpp"

namespace ov {
namespace snippets {
namespace lowered {
namespace pass {

bool InsertPerfCountVerbose::run(snippets::lowered::LinearIR& linear_ir,
snippets::lowered::LinearIR::constExprIt begin,
snippets::lowered::LinearIR::constExprIt end) {
OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::InsertPerfCountVerbose")

static size_t seq_number = 0;
bool modified = false;
auto csv_path = linear_ir.get_config().debug_config.dumpParams.csv_path;

std::vector<std::shared_ptr<snippets::utils::Dumper>> dumpers;
dumpers.push_back(std::make_shared<snippets::utils::ConsoleDumper>());
// Add CSV dumper if path is provided
if (!linear_ir.get_config().debug_config.dumpParams.csv_path.empty()) {
dumpers.push_back(std::make_shared<snippets::utils::CSVDumper>(csv_path));
}

for (auto expr_it = begin; expr_it != end; expr_it++) {
const auto& brgemm_expr = *expr_it;
const auto brgemm = ov::as_type_ptr<ov::snippets::op::Brgemm>(brgemm_expr->get_node());
if (!brgemm)
continue;
// Collect brgemm parameters
auto params = collect_params(brgemm_expr, linear_ir);

const auto& perf_count_begin = std::make_shared<snippets::op::PerfCountBegin>();
perf_count_begin->set_friendly_name(std::string("PerfCountVerbose_Begin_") + std::to_string(seq_number));
const auto empty_inputs = std::vector<PortConnectorPtr>{};
linear_ir.insert_node(perf_count_begin, empty_inputs, expr_it->get()->get_loop_ids(), false, expr_it);

const auto& perf_count_end = std::make_shared<snippets::op::PerfCountEnd>(perf_count_begin->output(0), dumpers, params);
perf_count_end->set_friendly_name(std::string("PerfCountVerbose_End_") + std::to_string(seq_number));

linear_ir.insert_node(perf_count_end, empty_inputs, expr_it->get()->get_loop_ids(), false, next(expr_it));
seq_number++;
modified = true;
}
return modified;
}

std::string InsertPerfCountVerbose::collect_params(const ov::snippets::lowered::ExpressionPtr& brgemm_expr,
const snippets::lowered::LinearIR& linear_ir) {
const auto brgemm = ov::as_type_ptr<ov::snippets::op::Brgemm>(brgemm_expr->get_node());
OPENVINO_ASSERT(brgemm, "Brgemm is nullptr!");
std::stringstream ss;
ss << m_subgraph_name << ',';
ss << brgemm_expr->get_node()->get_friendly_name() << ',';
for (size_t i = 0; i < brgemm->get_input_size(); ++i) {
ss << brgemm->get_input_element_type(i);
if (i != brgemm->get_input_size() - 1) {
ss << ';';
}
}
ss << ',';
for (size_t i = 0; i < brgemm->get_output_size(); ++i) {
ss << brgemm->get_output_element_type(i);
if (i != brgemm->get_output_size() - 1) {
ss << ';';
}
}
ss << ',';
for (size_t i = 0; i < brgemm->inputs().size(); ++i) {
const auto& port_desc = brgemm_expr->get_input_port_descriptor(i);
const auto& shape = ov::snippets::utils::get_planar_vdims(port_desc->get_shape(), port_desc->get_layout());
ss << utils::tensor2str(shape, " ");
ss << ';';
}
ss.seekp(-1, ss.cur);
ss << ',';
for (size_t i = 0; i < brgemm->outputs().size(); ++i) {
const auto& port_desc = brgemm_expr->get_output_port_descriptor(i);
const auto& shape = ov::snippets::utils::get_preordered_vdims(port_desc->get_shape(), port_desc->get_layout());
ss << utils::tensor2str(shape, " ");
ss << ';';
}
ss.seekp(-1, ss.cur);
ss << ',';
for (size_t i = 0; i < brgemm->inputs().size(); ++i) {
const auto& port_desc = brgemm_expr->get_input_port_descriptor(i);
ss << utils::tensor2str(port_desc->get_layout(), " ");
ss << ';';
}
ss << ',';
for (size_t i = 0; i < brgemm->outputs().size(); ++i) {
const auto& port_desc = brgemm_expr->get_output_port_descriptor(i);
ss << utils::tensor2str(port_desc->get_layout(), " ");
ss << ';';
}
ss << ',';

const auto& in_0_desc = brgemm_expr->get_input_port_descriptor(0);
const auto& in_1_desc = brgemm_expr->get_input_port_descriptor(1);
const auto& out_desc = brgemm_expr->get_output_port_descriptor(0);

const auto& in_0_planar_dims =
ov::snippets::utils::get_planar_vdims(in_0_desc->get_shape(), in_0_desc->get_layout());
const auto& in_1_planar_dims =
ov::snippets::utils::get_planar_vdims(in_1_desc->get_shape(), in_1_desc->get_layout());
const auto& out_preordered_dims =
ov::snippets::utils::get_preordered_vdims(out_desc->get_shape(), out_desc->get_layout());

const auto& m = *++out_preordered_dims.rbegin();
const auto& n = *out_preordered_dims.rbegin();
const auto& k0 = *in_0_planar_dims.rbegin();
const auto& k1 = *++in_1_planar_dims.rbegin();
size_t k = 0;
OPENVINO_ASSERT(utils::merge_dynamic_dim(k, k0, k1),
"Brgemm input descriptors have incompatible K dimension value.");
ss << static_cast<int64_t>(m) << ',' << static_cast<int64_t>(n) << ',' << static_cast<int64_t>(k) << ',';

size_t m_block = in_0_desc->get_subtensor().front();
size_t n_block = in_1_desc->get_subtensor().back();
size_t k_block = out_desc->get_subtensor().back();

auto append_block_info = [&](size_t block) {
if (block == utils::get_full_dim_value()) {
ss << "FULL_DIM";
} else if (block == utils::get_dynamic_value<size_t>()) {
ss << "?";
} else {
ss << block;
}
ss << ',';
};

append_block_info(m_block);
append_block_info(n_block);
append_block_info(k_block);
return ss.str();
}

} // namespace pass
} // namespace lowered
} // namespace snippets
} // namespace ov

#endif // SNIPPETS_DEBUG_CAPS
Loading

0 comments on commit c27f796

Please sign in to comment.