Skip to content

Commit

Permalink
Apply CPU plugin patch to fix Rope in ContinuousBatching mode
Browse files Browse the repository at this point in the history
  • Loading branch information
itikhono committed Jan 23, 2025
1 parent c1f299c commit 7a16a50
Showing 1 changed file with 17 additions and 2 deletions.
19 changes: 17 additions & 2 deletions src/plugins/intel_cpu/src/nodes/rope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,11 +334,16 @@ struct RoPE::RoPEExecutorQwen : public RoPE::Executor {
ov::intel_cpu::PlainTensor t_cos(inputs[1]); // [1, present-kv-length, 1, rotary_dims]
ov::intel_cpu::PlainTensor t_sin(inputs[2]); // [1, present-kv-length, 1, rotary_dims]
ov::intel_cpu::PlainTensor t_dst(outputs[0]); // [batch, length, head_cnt, head_size]>
ov::intel_cpu::PlainTensor gather;

auto rotary_dims = t_cos.size(3);

if (m_config.slice_stop - m_config.slice_start > 0) {
t_src = t_src.slice(2, m_config.slice_start, m_config.slice_stop);
}
if (m_config.gather_position_arg_id > 0) {
gather.reset(inputs[m_config.gather_position_arg_id]);
}

auto batch_size = t_src.size(0);
auto seq_len = t_src.size(1);
Expand All @@ -347,9 +352,19 @@ struct RoPE::RoPEExecutorQwen : public RoPE::Executor {
auto present_kv_len = t_cos.size(1);

parallel_for3d(batch_size, seq_len, head_cnt, [&](size_t b, size_t p, size_t h) {
size_t sincos_pos;
if (gather) {
if (gather.m_rank == 4)
sincos_pos = gather.at<int32_t>({b, h, p, 0}, true);
else
sincos_pos = gather.at<int32_t>({b, p}, true);
} else {
sincos_pos = present_kv_len - seq_len + p;
}

auto* src = t_src.ptr<T>(b, p, h * head_size);
auto* cos = &t_cos.at<float>({b, present_kv_len - seq_len + p, h, 0}, true);
auto* sin = &t_sin.at<float>({b, present_kv_len - seq_len + p, h, 0}, true);
auto* cos = &t_cos.at<float>({b, sincos_pos, h, 0}, true);
auto* sin = &t_sin.at<float>({b, sincos_pos, h, 0}, true);
auto* dst = t_dst.ptr<T>(b, p, h);

if (m_rotaryKernel) {
Expand Down

0 comments on commit 7a16a50

Please sign in to comment.