diff --git a/src/core/src/op/multiply.cpp b/src/core/src/op/multiply.cpp index c1c47df1abd3ae..264d74b4042a15 100644 --- a/src/core/src/op/multiply.cpp +++ b/src/core/src/op/multiply.cpp @@ -48,7 +48,7 @@ bool Multiply::evaluate(TensorVector& outputs, const TensorVector& inputs) const using namespace ov::element; return IF_TYPE_OF(v1_Multiply_evaluate, - OV_PP_ET_LIST(bf16, f16, f32, f64, i32, i64, u32, u64), + OV_PP_ET_LIST(bf16, f16, f32, f64, i8, i32, i64, u8, u32, u64), multiply::Evaluate, inputs[0].get_element_type(), inputs[0], diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp index c519e132f203ab..ae15bc21fd0dbb 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp @@ -124,7 +124,9 @@ bool jitIsSupported(const Node* node, ov::element::f16, ov::element::f32, ov::element::i32, - ov::element::u32 + ov::element::u32, + ov::element::i8, + ov::element::u8 }; if (!check_precisions(input_precisions, supported_precisions)) { diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp index 29127f7e36494f..53a79119b75fa3 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp @@ -274,6 +274,32 @@ void jit_uni_eltwise_generic::generate() { } } +namespace utils { +template +void load_vector(const T1& data_lane, + const T2& data_lanes, + const Xbyak_aarch64::XReg &ptr_reg, + const int64_t offset, + const bool broadcast, + jit_generator* h) { + if (broadcast) { + if (offset == 0) { + h->ld1r(data_lane, ptr(ptr_reg)); + } else { + h->add_imm(h->X_DEFAULT_ADDR, ptr_reg, offset, h->X_TMP_0); + h->ld1r(data_lane, ptr(h->X_DEFAULT_ADDR)); + } + } else { + if (offset == 0) { + h->ld1(data_lanes, Xbyak_aarch64::ptr(ptr_reg)); + } else { + h->add_imm(h->X_DEFAULT_ADDR, ptr_reg, offset, h->X_TMP_0); + h->ld1(data_lanes, Xbyak_aarch64::ptr(h->X_DEFAULT_ADDR)); + } + } +} +} // namespace utils + template void jit_uni_eltwise_generic::load_vector(const TReg& data, const XReg& ptr_reg, @@ -283,16 +309,7 @@ void jit_uni_eltwise_generic::load_vector(const TReg& data, const int32_t ptr_offset) { switch (src_prc) { case ov::element::f16: { - if (broadcast) { - if (ptr_offset == 0) { - ld1r(data.h, ptr(ptr_reg)); - } else { - add_imm(ptr_reg, ptr_reg, ptr_offset, X_DEFAULT_ADDR); - ld1r(data.h, ptr(ptr_reg)); - } - } else { - ldr(Xbyak_aarch64::DReg(data.getIdx()), Xbyak_aarch64::ptr(ptr_reg, ptr_offset)); - } + utils::load_vector(data.h, data.h4, ptr_reg, ptr_offset, broadcast, this); break; } case ov::element::f32: @@ -305,8 +322,13 @@ void jit_uni_eltwise_generic::load_vector(const TReg& data, } break; } + case ov::element::i8: + case ov::element::u8: { + utils::load_vector(data.b, data.s, ptr_reg, ptr_offset, broadcast, this); + break; + } default: { - IE_THROW(Unexpected) << "src_prc " << src_prc << " is not supported";; + IE_THROW(Unexpected) << "src_prc " << src_prc << " is not supported"; } } @@ -322,10 +344,22 @@ void jit_uni_eltwise_generic::load_vector(const TReg& data, scvtf(data.s, data.s); break; } + case ov::element::i8: { + sshll(data.h8, data.b8, 0); + sshll(data.s4, data.h4, 0); + scvtf(data.s, data.s); + break; + } case ov::element::u32: { ucvtf(data.s, data.s); break; } + case ov::element::u8: { + ushll(data.h8, data.b8, 0); + ushll(data.s4, data.h4, 0); + ucvtf(data.s, data.s); + break; + } default: IE_THROW(Unexpected) << "src_prc " << src_prc << " is not supported";; } @@ -353,6 +387,24 @@ void jit_uni_eltwise_generic::load_scalar(const SReg& data, ldr(data, Xbyak_aarch64::ptr(ptr, ptr_offset)); break; } + case ov::element::i8: { + ldr(Xbyak_aarch64::BReg(data.getIdx()), Xbyak_aarch64::ptr(ptr, ptr_offset)); + + // scalar is loaded, operates with vector + TReg vec(data.getIdx()); + sshll(vec.h8, vec.b8, 0); + sshll(vec.s4, vec.h4, 0); + break; + } + case ov::element::u8: { + ldr(Xbyak_aarch64::BReg(data.getIdx()), Xbyak_aarch64::ptr(ptr, ptr_offset)); + + // scalar is loaded, operates with vector + TReg vec(data.getIdx()); + ushll(vec.h8, vec.b8, 0); + ushll(vec.s4, vec.h4, 0); + break; + } default: { IE_THROW(Unexpected) << "dst_prc " << dst_prc << " is not supported";; } @@ -366,11 +418,13 @@ void jit_uni_eltwise_generic::load_scalar(const SReg& data, fcvt(Xbyak_aarch64::SReg(data.getIdx()), Xbyak_aarch64::HReg(data.getIdx())); break; } - case ov::element::i32: { + case ov::element::i32: + case ov::element::i8: { scvtf(Xbyak_aarch64::SReg(data.getIdx()), Xbyak_aarch64::SReg(data.getIdx())); break; } - case ov::element::u32: { + case ov::element::u32: + case ov::element::u8: { ucvtf(Xbyak_aarch64::SReg(data.getIdx()), Xbyak_aarch64::SReg(data.getIdx())); break; } @@ -406,6 +460,18 @@ void jit_uni_eltwise_generic::store_vector(const XReg& ptr, fcvtnu(data.s, data.s); break; } + case ov::element::i8: { + fcvtns(data.s, data.s); + xtn(data.h4, data.s4); + xtn(data.b8, data.h8); + break; + } + case ov::element::u8: { + fcvtnu(data.s, data.s); + xtn(data.h4, data.s4); + xtn(data.b8, data.h8); + break; + } default: { IE_THROW(Unexpected) << "src_prc " << src_prc << " is not supported";; } @@ -429,6 +495,11 @@ void jit_uni_eltwise_generic::store_vector(const XReg& ptr, str(Xbyak_aarch64::QReg(data.getIdx()), Xbyak_aarch64::ptr(ptr, ptr_offset)); break; } + case ov::element::i8: + case ov::element::u8: { + str(Xbyak_aarch64::SReg(data.getIdx()), Xbyak_aarch64::ptr(ptr, ptr_offset)); + break; + } default: { IE_THROW(Unexpected) << "dst_prc " << dst_prc << " is not supported";; } @@ -457,6 +528,20 @@ void jit_uni_eltwise_generic::store_scalar(const XReg& ptr, fcvtnu(data, data); break; } + case ov::element::i8: { + TReg vec_data(data.getIdx()); + fcvtns(vec_data.s, vec_data.s); + xtn(vec_data.h4, vec_data.s4); + xtn(vec_data.b8, vec_data.h8); + break; + } + case ov::element::u8: { + TReg vec_data(data.getIdx()); + fcvtnu(vec_data.s, vec_data.s); + xtn(vec_data.h4, vec_data.s4); + xtn(vec_data.b8, vec_data.h8); + break; + } default: { IE_THROW(Unexpected) << "src_prc " << src_prc << " is not supported";; } @@ -480,6 +565,11 @@ void jit_uni_eltwise_generic::store_scalar(const XReg& ptr, str(data, Xbyak_aarch64::ptr(ptr, ptr_offset)); break; } + case ov::element::i8: + case ov::element::u8: { + str(Xbyak_aarch64::BReg(data.getIdx()), Xbyak_aarch64::ptr(ptr, ptr_offset)); + break; + } default: { IE_THROW(Unexpected) << "dst_prc " << src_prc << " is not supported";; } diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp index ab340f9ce35c4c..742dad560413c4 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp @@ -66,10 +66,18 @@ ov::Tensor EltwiseLayerCPUTest::generate_eltwise_input(const ov::element::Type& } else { switch (type) { case ov::element::i8: - params = gen_params(INT8_MAX, INT8_MIN); + if (adopt_intervals) { + params = gen_params(11 * 2, -11); + } else { + params = gen_params(INT8_MAX, INT8_MIN); + } break; case ov::element::u8: - params = gen_params(UINT8_MAX, 0); + if (adopt_intervals) { + params = gen_params(15, 0); + } else { + params = gen_params(UINT8_MAX, 0); + } break; case ov::element::i16: params = gen_params(INT16_MAX, INT16_MIN); @@ -109,7 +117,8 @@ void EltwiseLayerCPUTest::generate_inputs(const std::vector& targetIn inputs.insert({funcInput.get_node_shared_ptr(), generate_eltwise_input( funcInput.get_element_type(), targetInputStaticShapes[i], - (funcInput.get_element_type() == element::i32) || (funcInput.get_element_type() == element::u32))}); + (funcInput.get_element_type() == element::i32) || (funcInput.get_element_type() == element::u32) || + (funcInput.get_element_type() == element::i8) || (funcInput.get_element_type() == element::u8))}); } } @@ -199,7 +208,11 @@ void EltwiseLayerCPUTest::SetUp() { } } - auto data_tensor = generate_eltwise_input(netType, shape, (netType == element::i32) || (netType == element::u32)); + auto data_tensor = generate_eltwise_input( + netType, + shape, + (netType == element::i32) || (netType == element::u32) || + (netType == element::i8) || (netType == element::u8)); if ((netType == ElementType::i8) || (netType == ElementType::u8)) { auto data_ptr = reinterpret_cast(data_tensor.data()); std::vector data(data_ptr, data_ptr + ov::shape_size(shape)); @@ -272,8 +285,11 @@ const std::vector& netType() { const std::vector& netTypeJit() { static const std::vector netType = { + ElementType::f16, ElementType::i32, - ElementType::f32}; + ElementType::f32, + ElementType::i8, + ElementType::u8}; return netType; }