Skip to content

Commit

Permalink
Implemented CPU plugin just-in-time emitter for NotEqual operation. (#…
Browse files Browse the repository at this point in the history
…28257)

### Details:
# Added NotEqual Operation JIT Emitter for ARM64 SIMD

Implemented fp32 NotEqual operation JIT emitter for ARM64 SIMD platform
using NEON instructions for optimized element-wise comparison
operations. Added new NotEqualEmitter class and integrated with ARM64
executor and kernel (modified create_eltwise_emitter and
get_supported_precisions).

Included comprehensive unit tests and performance benchmarks. Verified
against reference CPU implementation with proper handling of edge cases
(NaN, infinity, denormals).

No breaking changes. Requires ARM64 platform with NEON SIMD support.

### Tickets:
 - *#27516*
  • Loading branch information
geeky33 authored Jan 30, 2025
1 parent aacad53 commit aa274e4
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,67 @@ void jit_equal_emitter::register_table_entries() {
push_arg_entry_of("one", 0x3f800000, true);
}

/// NOTEQUAL ///
jit_not_equal_emitter::jit_not_equal_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
const std::shared_ptr<ov::Node>& node)
: jit_emitter(host, host_isa, get_arithmetic_binary_exec_precision(node)) {
prepare_table();
}

jit_not_equal_emitter::jit_not_equal_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
const ov::element::Type exec_prc)
: jit_emitter(host, host_isa, exec_prc) {
prepare_table();
}

size_t jit_not_equal_emitter::get_inputs_count() const {
return 2;
}

size_t jit_not_equal_emitter::get_aux_vecs_count() const {
return 1;
}

size_t jit_not_equal_emitter::get_aux_gprs_count() const {
return 1;
}

std::set<std::vector<element::Type>> jit_not_equal_emitter::get_supported_precisions(
const std::shared_ptr<ov::Node>& node) {
return {{element::f32, element::f32}};
}

void jit_not_equal_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs,
const std::vector<size_t>& out_vec_idxs) const {
if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) {
emit_isa<dnnl::impl::cpu::aarch64::asimd>(in_vec_idxs, out_vec_idxs);
} else {
OV_CPU_JIT_EMITTER_THROW("Can't create jit eltwise kernel");
}
}
template <dnnl::impl::cpu::aarch64::cpu_isa_t isa>
void jit_not_equal_emitter::emit_isa(const std::vector<size_t>& in_vec_idxs,
const std::vector<size_t>& out_vec_idxs) const {
OV_CPU_JIT_EMITTER_ASSERT(exec_prc_ == ov::element::f32, "unsupported precision: " + exec_prc_.to_string());

using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits<isa>::TReg;
const TReg src1 = TReg(in_vec_idxs[0]);
const TReg src2 = TReg(in_vec_idxs[1]);
const TReg dst = TReg(out_vec_idxs[0]);
const TReg aux = TReg(aux_vec_idxs[0]);

h->fcmeq(dst.s, src1.s, src2.s);

h->not_(dst.b16, dst.b16);
h->ld1r(aux.s, table_val2("one"));
h->and_(dst.b16, dst.b16, aux.b16);
}
void jit_not_equal_emitter::register_table_entries() {
push_arg_entry_of("one", 0x3f800000, true);
}

/// ELU ///
jit_elu_emitter::jit_elu_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,34 @@ class jit_equal_emitter : public jit_emitter {
void register_table_entries() override;
};

class jit_not_equal_emitter : public jit_emitter {
public:
jit_not_equal_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
const ov::element::Type exec_prc = ov::element::f32);

jit_not_equal_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
const std::shared_ptr<ov::Node>& n);

size_t get_inputs_count() const override;

size_t get_aux_vecs_count() const override;

size_t get_aux_gprs_count() const override;

static std::set<std::vector<element::Type>> get_supported_precisions(
const std::shared_ptr<ov::Node>& node = nullptr);

private:
void emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const override;

template <dnnl::impl::cpu::aarch64::cpu_isa_t isa>
void emit_isa(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const;

void register_table_entries() override;
};

class jit_exp_emitter : public jit_emitter {
public:
jit_exp_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ bool JitEltwiseExecutor::isSupported(const Algorithm& algorithm,
Algorithm::EltwiseMod,
Algorithm::EltwiseMultiply,
Algorithm::EltwiseMulAdd,
Algorithm::EltwiseNotEqual,
Algorithm::EltwisePowerStatic,
Algorithm::EltwisePrelu,
Algorithm::EltwiseRelu,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,7 @@ std::shared_ptr<jit_emitter> jit_uni_eltwise_generic<isa>::create_eltwise_emitte
OV_CASE(Algorithm::EltwiseGreater, ov::intel_cpu::aarch64::jit_greater_emitter),
OV_CASE(Algorithm::EltwiseGreaterEqual, ov::intel_cpu::aarch64::jit_greater_equal_emitter),
OV_CASE(Algorithm::EltwiseMulAdd, ov::intel_cpu::aarch64::jit_mul_add_emitter),
OV_CASE(Algorithm::EltwiseNotEqual, ov::intel_cpu::aarch64::jit_not_equal_emitter),
OV_CASE(Algorithm::EltwiseMod, ov::intel_cpu::aarch64::jit_mod_emitter),
OV_CASE(Algorithm::EltwiseMultiply, ov::intel_cpu::aarch64::jit_multiply_emitter),
OV_CASE(Algorithm::EltwisePowerStatic, ov::intel_cpu::aarch64::jit_power_static_emitter),
Expand Down Expand Up @@ -890,6 +891,7 @@ std::set<std::vector<element::Type>> eltwise_precision_helper::get_supported_pre
OV_CASE(Algorithm::EltwiseMish, jit_mish_emitter),
OV_CASE(Algorithm::EltwiseMod, jit_mod_emitter),
OV_CASE(Algorithm::EltwiseMulAdd, jit_mul_add_emitter),
OV_CASE(Algorithm::EltwiseNotEqual, jit_not_equal_emitter),
OV_CASE(Algorithm::EltwiseMultiply, jit_multiply_emitter),
OV_CASE(Algorithm::EltwisePrelu, jit_prelu_emitter),
OV_CASE(Algorithm::EltwisePowerStatic, jit_power_static_emitter),
Expand Down

0 comments on commit aa274e4

Please sign in to comment.