From 7e8c9454c290b4ecc7d6c4c1b420b3396bf24e6d Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Fri, 31 Jan 2025 14:41:22 +0400 Subject: [PATCH] [Snippets][CPU] Optimized reg spills in BrgemmCopyBKernel (#28635) ### Details: - *BrgemmCopyBKernel is JIT compiled kernel which emits blocking loop by `N` dimension with binary call of the kernel `dnnl::x64::brgemm_copy_b_kernel` inside. Currently, we spill all registers before the each call of this kernel. It means that if the dimension `N` is quite big, there will be many binary calls and reg spills. These regular reg spills may lead to perf degradation for Subgraphs with small `M`, `K,` and big `N` dimensions. This PR uses feature of binary call optimizations - spills only live registers* ### Tickets: - *N/A* --- .../snippets/x64/kernel_executors/brgemm_copy_b.cpp | 12 +++++++++++- .../snippets/x64/kernel_executors/brgemm_copy_b.hpp | 2 ++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.cpp index 54d4ffaa433944..0cb074da6d426f 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.cpp @@ -253,7 +253,7 @@ void BrgemmCopyBKernel::emit_brgemm_copy_b_kernel_call(size_t N, size_t offset_out, size_t offset_comp) { EmitABIRegSpills spill(this); - spill.preamble(); + spill.preamble(get_live_regs()); const auto add_offset = [&](Xbyak::Reg64 reg, size_t bytes_offset) { if (bytes_offset) { @@ -298,6 +298,16 @@ void BrgemmCopyBKernel::emit_brgemm_copy_b_kernel_call(size_t N, spill.postamble(); } +std::set BrgemmCopyBKernel::get_live_regs() const { + // Only the registers `src_reg`, `tr_src_reg` and `comp_reg` should be + // saved on each `jit_brgemm_matmul_copy_b_t` binary call. + // They're ABI parameter registers (caller saved). So we have to manually + // spills only them on each `jit_brgemm_matmul_copy_b_t` binary call + return {{snippets::RegType::gpr, static_cast(src_reg.getIdx())}, + {snippets::RegType::gpr, static_cast(tr_src_reg.getIdx())}, + {snippets::RegType::gpr, static_cast(comp_reg.getIdx())}}; +} + void BrgemmCopyBKernel::execute(matmul::jit_brgemm_matmul_copy_b_t* kernel, const void* src, const void* dst, diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.hpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.hpp index 5ef740067f2035..594702fed4ae3c 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_copy_b.hpp @@ -170,6 +170,8 @@ struct BrgemmCopyBKernel : public RepackedInputKernel, public dnnl::impl::cpu::x void init_brgemm_copy_b_kernel(std::unique_ptr& kernel, const BrgemmCopyBKernelConfig& conf) const; + std::set get_live_regs() const; + static constexpr auto abi_param_regs = dnnl::impl::cpu::x64::abi_param_regs; const Xbyak::Reg64 src_reg = abi_param2; const Xbyak::Reg64 tr_src_reg = abi_param3;