Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CPU] [ARM64] jit eltwise: int8 support #22687

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/core/src/op/divide.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ bool Divide::evaluate(TensorVector& outputs, const TensorVector& inputs) const {
this,
outputs,
inputs,
OV_PP_ET_LIST(f32, i32, i64, u32, u64),
OV_PP_ET_LIST(f32, i8, i32, i64, u8, u32, u64),
praasz marked this conversation as resolved.
Show resolved Hide resolved
divide::Evaluate,
inputs[0].get_element_type(),
inputs[0],
Expand Down
2 changes: 1 addition & 1 deletion src/core/src/op/multiply.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ bool Multiply::evaluate(TensorVector& outputs, const TensorVector& inputs) const
this,
outputs,
inputs,
OV_PP_ET_LIST(f32, f64, i32, i64, u32, u64),
OV_PP_ET_LIST(f32, f64, i8, i32, i64, u8, u32, u64),
multiply::Evaluate,
inputs[0].get_element_type(),
inputs[0],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,13 @@ bool JitEltwiseExecutor::isSupported(
// Divide operation doesn't support int32 tensor inference in fp32 precision.
// As result Divide operation supports fp16 and fp32 only.
std::set<ov::element::Type> { ov::element::f16, ov::element::f32 } :
std::set<ov::element::Type> { ov::element::f16, ov::element::f32, ov::element::i32 };
std::set<ov::element::Type> {
ov::element::f16,
ov::element::f32,
ov::element::i32,
ov::element::i8,
ov::element::u8
};

if (!check_precisions(input_precisions, output_precisions, supported_precisions)) {
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,32 @@ void jit_uni_eltwise_generic<isa>::generate() {
}
}

namespace utils {
template <typename T1, typename T2>
void load_vector(const T1& data_lane,
const T2& data_lanes,
const Xbyak_aarch64::XReg &ptr_reg,
const int64_t offset,
const bool broadcast,
jit_generator* h) {
if (broadcast) {
if (offset == 0) {
h->ld1r(data_lane, ptr(ptr_reg));
} else {
h->add_imm(h->X_DEFAULT_ADDR, ptr_reg, offset, h->X_TMP_0);
h->ld1r(data_lane, ptr(h->X_DEFAULT_ADDR));
}
} else {
if (offset == 0) {
h->ld1(data_lanes, ptr(ptr_reg));
} else {
h->add_imm(h->X_DEFAULT_ADDR, ptr_reg, offset, h->X_TMP_0);
h->ld1(data_lanes, ptr(h->X_DEFAULT_ADDR));
}
}
}
} // namespace utils

template <dnnl::impl::cpu::aarch64::cpu_isa_t isa>
void jit_uni_eltwise_generic<isa>::load_vector(const TReg& data,
const XReg& ptr_reg,
Expand All @@ -281,16 +307,7 @@ void jit_uni_eltwise_generic<isa>::load_vector(const TReg& data,
const int32_t ptr_offset) {
switch (src_prc) {
case ov::element::f16: {
if (broadcast) {
if (ptr_offset == 0) {
ld1r(data.h, ptr(ptr_reg));
} else {
add_imm(ptr_reg, ptr_reg, ptr_offset, X_DEFAULT_ADDR);
ld1r(data.h, ptr(ptr_reg));
}
} else {
ldr(Xbyak_aarch64::DReg(data.getIdx()), Xbyak_aarch64::ptr(ptr_reg, ptr_offset));
}
utils::load_vector(data.h, data.h4, ptr_reg, ptr_offset, broadcast, this);
break;
}
case ov::element::f32:
Expand All @@ -302,6 +319,18 @@ void jit_uni_eltwise_generic<isa>::load_vector(const TReg& data,
}
break;
}
case ov::element::i8: {
utils::load_vector(data.b, data.s, ptr_reg, ptr_offset, broadcast, this);
sshll(data.h8, data.b8, 0);
sshll(data.s4, data.h4, 0);
break;
}
case ov::element::u8: {
utils::load_vector(data.b, data.s, ptr_reg, ptr_offset, broadcast, this);
ushll(data.h8, data.b8, 0);
ushll(data.s4, data.h4, 0);
break;
}
default: {
OPENVINO_THROW("src_prc " + src_prc.to_string() + " is not supported, dst_prc is " + dst_prc.to_string());
}
Expand All @@ -319,6 +348,14 @@ void jit_uni_eltwise_generic<isa>::load_vector(const TReg& data,
scvtf(data.s, data.s);
break;
}
case ov::element::i8: {
scvtf(data.s, data.s);
break;
}
case ov::element::u8: {
ucvtf(data.s, data.s);
break;
}
default:
OPENVINO_THROW("src_prc " + src_prc.to_string() + " is not supported, dst_prc is " + dst_prc.to_string());
}
Expand All @@ -345,6 +382,24 @@ void jit_uni_eltwise_generic<isa>::load_scalar(const SReg& data,
ldr(data, Xbyak_aarch64::ptr(ptr, ptr_offset));
break;
}
case ov::element::i8: {
ldr(Xbyak_aarch64::BReg(data.getIdx()), Xbyak_aarch64::ptr(ptr, ptr_offset));

// scalar is loaded, operates with vector
TReg vec(data.getIdx());
dmitry-gorokhov marked this conversation as resolved.
Show resolved Hide resolved
sshll(vec.h8, vec.b8, 0);
sshll(vec.s4, vec.h4, 0);
break;
}
case ov::element::u8: {
ldr(Xbyak_aarch64::BReg(data.getIdx()), Xbyak_aarch64::ptr(ptr, ptr_offset));

// scalar is loaded, operates with vector
TReg vec(data.getIdx());
ushll(vec.h8, vec.b8, 0);
ushll(vec.s4, vec.h4, 0);
break;
}
default: {
OPENVINO_THROW("src_prc " + src_prc.to_string() + " is not supported, dst_prc is " + dst_prc.to_string());
}
Expand All @@ -358,10 +413,15 @@ void jit_uni_eltwise_generic<isa>::load_scalar(const SReg& data,
fcvt(Xbyak_aarch64::SReg(data.getIdx()), Xbyak_aarch64::HReg(data.getIdx()));
break;
}
case ov::element::i32: {
case ov::element::i32:
case ov::element::i8: {
scvtf(Xbyak_aarch64::SReg(data.getIdx()), Xbyak_aarch64::SReg(data.getIdx()));
break;
}
case ov::element::u8: {
ucvtf(Xbyak_aarch64::SReg(data.getIdx()), Xbyak_aarch64::SReg(data.getIdx()));
break;
}
default:
OPENVINO_THROW("src_prc " + src_prc.to_string() + " is not supported, dst_prc is " + dst_prc.to_string());
}
Expand Down Expand Up @@ -390,6 +450,18 @@ void jit_uni_eltwise_generic<isa>::store_vector(const XReg& ptr,
fcvtns(data.s, data.s);
break;
}
case ov::element::i8: {
fcvtns(data.s, data.s);
xtn(data.h4, data.s4);
xtn(data.b8, data.h8);
break;
}
case ov::element::u8: {
fcvtnu(data.s, data.s);
xtn(data.h4, data.s4);
xtn(data.b8, data.h8);
break;
}
default: {
OPENVINO_THROW("dst_prc " + dst_prc.to_string() + " is not supported, src_prc is " + src_prc.to_string());
}
Expand All @@ -412,6 +484,11 @@ void jit_uni_eltwise_generic<isa>::store_vector(const XReg& ptr,
str(Xbyak_aarch64::QReg(data.getIdx()), Xbyak_aarch64::ptr(ptr, ptr_offset));
break;
}
case ov::element::i8:
case ov::element::u8: {
str(Xbyak_aarch64::SReg(data.getIdx()), Xbyak_aarch64::ptr(ptr, ptr_offset));
break;
}
default: {
OPENVINO_THROW("dst_prc " + dst_prc.to_string() + " is not supported, src_ptr is " + src_prc.to_string());
}
Expand All @@ -436,6 +513,20 @@ void jit_uni_eltwise_generic<isa>::store_scalar(const XReg& ptr,
fcvtns(data, data);
break;
}
case ov::element::i8: {
TReg vec_data(data.getIdx());
fcvtns(vec_data.s, vec_data.s);
xtn(vec_data.h4, vec_data.s4);
xtn(vec_data.b8, vec_data.h8);
break;
}
case ov::element::u8: {
TReg vec_data(data.getIdx());
fcvtnu(vec_data.s, vec_data.s);
xtn(vec_data.h4, vec_data.s4);
xtn(vec_data.b8, vec_data.h8);
break;
}
default: {
OPENVINO_THROW("dst_prc " + dst_prc.to_string() + " is not supported, src_prc is " + src_prc.to_string());
}
Expand All @@ -458,6 +549,11 @@ void jit_uni_eltwise_generic<isa>::store_scalar(const XReg& ptr,
str(data, Xbyak_aarch64::ptr(ptr, ptr_offset));
break;
}
case ov::element::i8:
case ov::element::u8: {
str(Xbyak_aarch64::BReg(data.getIdx()), Xbyak_aarch64::ptr(ptr, ptr_offset));
break;
}
default: {
OPENVINO_THROW("dst_prc " + src_prc.to_string() + " is not supported, src_prc is " + src_prc.to_string());
}
Expand Down
8 changes: 5 additions & 3 deletions src/plugins/intel_cpu/tests/functional/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,12 @@ else()
file(GLOB_RECURSE TMP_LIST_OF_TEST_CLASSES ${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests/classes/*.cpp)
file(GLOB_RECURSE TMP_LIST_OF_COMMON_TEST_INSTANCES ${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests/instances/common/*.cpp)
file(GLOB_RECURSE TMP_LIST_OF_ARM_TEST_INSTANCES ${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests/instances/arm/*.cpp)
file(GLOB_RECURSE TMP_LIST_OF_ARM_SUBGRAPH_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/arm/*.cpp)
file(GLOB_RECURSE TMP_LIST_OF_ARM_SUBGRAPH_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/common/*.cpp)
file(GLOB_RECURSE TMP_LIST_OF_ARM_SUBGRAPH_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/arm/*.cpp)
file(GLOB_RECURSE TMP_LIST_OF_COMMON_SUBGRAPH_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/common/*.cpp)
file(GLOB_RECURSE TMP_LIST_OF_SUBGRAPH_TEST_CLASSES ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/classes/*.*)

list(APPEND TMP_LIST_OF_EXPLICITLY_ENABLED_TESTS
${TMP_LIST_OF_TEST_CLASSES} ${TMP_LIST_OF_COMMON_TEST_INSTANCES} ${TMP_LIST_OF_ARM_TEST_INSTANCES} ${TMP_LIST_OF_ARM_SUBGRAPH_TESTS})
${TMP_LIST_OF_TEST_CLASSES} ${TMP_LIST_OF_COMMON_TEST_INSTANCES} ${TMP_LIST_OF_ARM_TEST_INSTANCES} ${TMP_LIST_OF_ARM_SUBGRAPH_TESTS} ${TMP_LIST_OF_COMMON_SUBGRAPH_TESTS} ${TMP_LIST_OF_SUBGRAPH_TEST_CLASSES})
set(TMP_EXPLICITLY_ENABLED_TESTS "${TMP_LIST_OF_EXPLICITLY_ENABLED_TESTS}")
endif()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ std::string EltwiseLayerCPUTest::getTestCaseName(testing::TestParamInfo<EltwiseL
return result.str();
}

// If adopt_intervals is true then:
// 1) the generated tensor value range is limited by operation result value (especially for multiply)
// which has to be in signed/unsigned int8 type range,
// 2) start value is defined by type sign: for signed int8 it's zero to have symmetric interval.
ov::Tensor EltwiseLayerCPUTest::generate_eltwise_input(const ov::element::Type& type, const ov::Shape& shape, const bool adopt_intervals) {
struct gen_params {
uint32_t range;
Expand Down Expand Up @@ -66,10 +70,18 @@ ov::Tensor EltwiseLayerCPUTest::generate_eltwise_input(const ov::element::Type&
} else {
switch (type) {
case ov::element::i8:
params = gen_params(INT8_MAX, INT8_MIN);
if (adopt_intervals) {
params = gen_params(11 * 2, -11);
dmitry-gorokhov marked this conversation as resolved.
Show resolved Hide resolved
} else {
params = gen_params(INT8_MAX, INT8_MIN);
}
break;
case ov::element::u8:
params = gen_params(UINT8_MAX, 0);
if (adopt_intervals) {
params = gen_params(15, 0);
} else {
params = gen_params(UINT8_MAX, 0);
}
break;
case ov::element::i16:
params = gen_params(INT16_MAX, INT16_MIN);
Expand Down Expand Up @@ -109,7 +121,8 @@ void EltwiseLayerCPUTest::generate_inputs(const std::vector<ov::Shape>& targetIn
inputs.insert({funcInput.get_node_shared_ptr(), generate_eltwise_input(
funcInput.get_element_type(),
targetInputStaticShapes[i],
(funcInput.get_element_type() == element::i32) || (funcInput.get_element_type() == element::u32))});
(funcInput.get_element_type() == element::i32) || (funcInput.get_element_type() == element::u32) ||
(funcInput.get_element_type() == element::i8) || (funcInput.get_element_type() == element::u8))});
}
}

Expand Down Expand Up @@ -199,7 +212,11 @@ void EltwiseLayerCPUTest::SetUp() {
}
}

auto data_tensor = generate_eltwise_input(netType, shape, (netType == element::i32) || (netType == element::u32));
auto data_tensor = generate_eltwise_input(
netType,
shape,
(netType == element::i32) || (netType == element::u32) ||
(netType == element::i8) || (netType == element::u8));
if ((netType == ElementType::i8) || (netType == ElementType::u8)) {
auto data_ptr = reinterpret_cast<uint8_t*>(data_tensor.data());
std::vector<uint8_t> data(data_ptr, data_ptr + ov::shape_size(shape));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ const auto params_4D_int_jit = ::testing::Combine(
::testing::ValuesIn({ utils::EltwiseTypes::ADD, utils::EltwiseTypes::MULTIPLY }),
::testing::ValuesIn(secondaryInputTypes()),
::testing::ValuesIn(opTypes()),
::testing::ValuesIn({ ElementType::i32, ElementType::f32 }),
::testing::ValuesIn({ ElementType::i8, ElementType::u8, ElementType::f16, ElementType::i32, ElementType::f32 }),
::testing::Values(ov::element::undefined),
::testing::Values(ov::element::undefined),
::testing::Values(ov::test::utils::DEVICE_CPU),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <tuple>
#include <string>
#include <vector>

#include "custom/subgraph_tests/src/classes/eltwise_chain.hpp"

#include "shared_test_classes/base/ov_subgraph.hpp"
#include "common_test_utils/node_builders/constant.hpp"
#include "common_test_utils/node_builders/eltwise.hpp"
#include "common_test_utils/ov_tensor_utils.hpp"

using namespace CPUTestUtils;

namespace ov {
namespace test {
using namespace ov::test::utils;
using namespace ov::test::eltwise_chain;

namespace {

std::vector<std::vector<EltwiseTypes>> eltwiseOpsConvertInt8 = {
{ EltwiseTypes::MULTIPLY },
{ EltwiseTypes::ADD },
{ EltwiseTypes::DIVIDE }
};

INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChain_MergeConvert_int8, EltwiseChainTest,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given x64 Eltwise impl also supports Convert fusion why not to make this instance common?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done, thanks

Copy link
Contributor Author

@eshoguli eshoguli Mar 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rolled back, new issue was created: CVS-135542

::testing::Combine(
::testing::ValuesIn(static_shapes_to_test_representation(inputShapesConvert())),
::testing::Values(InputLayerType::CONSTANT),
::testing::ValuesIn(inputPrecisionsConvert()),
::testing::ValuesIn(eltwiseOpsConvertInt8),
::testing::Values(false),
::testing::ValuesIn({ov::element::i8, ov::element::u8}),
::testing::Values(ov::test::utils::DEVICE_CPU)),
EltwiseChainTest::getTestCaseName);

} // namespace
} // namespace test
} // namespace ov
Loading
Loading