From cda5a0239f5f661dc729cc2958fd61884c04082a Mon Sep 17 00:00:00 2001 From: Aleksandr Voron Date: Mon, 25 Mar 2024 11:22:00 +0100 Subject: [PATCH] [CPU][ARM] Enable both f16 and f32 kernels for aarch64 and introduce runtime f16 support check (#22992) Inherited from https://github.com/openvinotoolkit/openvino/pull/22437 --------- Co-authored-by: Ilya Lavrenov --- .github/workflows/linux_arm64.yml | 1 + .gitignore | 1 + src/plugins/intel_cpu/CMakeLists.txt | 26 +++++++++++++++--- src/plugins/intel_cpu/src/config.cpp | 14 ++++------ src/plugins/intel_cpu/src/graph.cpp | 11 ++++---- .../src/nodes/executors/acl/acl_eltwise.cpp | 27 +++++++++++++++++-- src/plugins/intel_cpu/src/nodes/reorder.cpp | 10 +++---- src/plugins/intel_cpu/src/nodes/reorder.h | 5 +--- .../transformation_pipeline.cpp | 10 ++++--- .../intel_cpu/src/utils/precision_support.cpp | 12 +++++++-- .../intel_cpu/tests/functional/CMakeLists.txt | 26 +++++++++++++++--- .../custom/behavior/ov_plugin/properties.cpp | 5 ++-- .../shared_tests_instances/core_config.cpp | 3 +-- .../skip_tests_config.cpp | 19 +++++++------ .../intel_cpu/thirdparty/ACLConfig.cmake | 17 +++++------- 15 files changed, 122 insertions(+), 65 deletions(-) diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml index 12bc0f8f84106e..32a2147a2b2433 100644 --- a/.github/workflows/linux_arm64.yml +++ b/.github/workflows/linux_arm64.yml @@ -172,6 +172,7 @@ jobs: -DCMAKE_COMPILE_WARNING_AS_ERROR=ON \ -DCMAKE_CXX_COMPILER_LAUNCHER=${{ env.CMAKE_CXX_COMPILER_LAUNCHER }} \ -DCMAKE_C_COMPILER_LAUNCHER=${{ env.CMAKE_C_COMPILER_LAUNCHER }} \ + -DOV_CPU_AARCH64_USE_MULTI_ISA=OFF \ -S ${OPENVINO_REPO} \ -B ${BUILD_DIR} diff --git a/.gitignore b/.gitignore index 9bc1e79b3e53b1..9dd22697d3780a 100644 --- a/.gitignore +++ b/.gitignore @@ -61,4 +61,5 @@ __pycache__ /tools/mo/*.svg /src/plugins/intel_cpu/tools/commit_slider/*.json /src/plugins/intel_cpu/tools/commit_slider/slider_cache/* +/src/plugins/intel_cpu/thirdparty/ComputeLibrary/build/* .github/GITHUB_OUTPUT diff --git a/src/plugins/intel_cpu/CMakeLists.txt b/src/plugins/intel_cpu/CMakeLists.txt index 70da87819f03e5..c65bceae2a1d0b 100644 --- a/src/plugins/intel_cpu/CMakeLists.txt +++ b/src/plugins/intel_cpu/CMakeLists.txt @@ -30,6 +30,16 @@ elseif(OV_COMPILER_IS_CLANG) endif() endif() +if (AARCH64 AND NOT APPLE AND CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10.2) + # according to https://github.com/ARM-software/ComputeLibrary/issues/1053#issuecomment-1846903707 comment + # the 'multi_isa=1' below enables FP32, FP16 and SVE / SVE2 kernels + # But: arm_sve.h header is not available on gcc older 10.2 (let's test it), so we have to check it + set(OV_CPU_AARCH64_USE_MULTI_ISA_DEFAULT ON) +else() + set(OV_CPU_AARCH64_USE_MULTI_ISA_DEFAULT OFF) +endif() +set(OV_CPU_AARCH64_USE_MULTI_ISA ${OV_CPU_AARCH64_USE_MULTI_ISA_DEFAULT} CACHE BOOL "Build multi-ISA ACL") + set(OV_CPU_ARM_TARGET_GENERIC_ARCHS armv8a armv8.2-a armv8.6-a armv8.6-a-sve armv8.6-a-sve2 armv8.6-a-sve2-sme2 @@ -41,7 +51,18 @@ if(ARM) # requires estate=32 ${OV_CPU_ARM_TARGET_GENERIC_ARCHS}) elseif(AARCH64) - set(OV_CPU_ARM_TARGET_ARCH_DEFAULT arm64-v8.2-a) + if(APPLE) + set(OV_CPU_ARM_TARGET_ARCH_DEFAULT arm64-v8.2-a) + else() + if(OV_CPU_AARCH64_USE_MULTI_ISA) + # set v8a even we want fp16 kernels, because + # we use multi_isa=1 in ACLConfig.cmake to enable both fp16 and fp32 kernels + # actual kernel is selected in runtime based on runtime capabilities + set(OV_CPU_ARM_TARGET_ARCH_DEFAULT arm64-v8a) + else() + set(OV_CPU_ARM_TARGET_ARCH_DEFAULT arm64-v8.2-a) + endif() + endif() set(OV_CPU_ARM_TARGET_ARCHS arm64-v8a arm64-v8.2-a arm64-v8.2-a-sve arm64-v8.2-a-sve2 # used with estate=64 @@ -49,9 +70,6 @@ elseif(AARCH64) endif() set(OV_CPU_ARM_TARGET_ARCH ${OV_CPU_ARM_TARGET_ARCH_DEFAULT} CACHE STRING "Architecture for ARM ComputeLibrary") set_property(CACHE OV_CPU_ARM_TARGET_ARCH PROPERTY STRINGS ${OV_CPU_ARM_TARGET_ARCHS}) -if(OV_CPU_ARM_TARGET_ARCH MATCHES "(armv|arm64-v)[8-9]\\.") - add_definitions(-DOV_CPU_ARM_ENABLE_FP16) -endif() if(X86 OR X86_64 OR AARCH64) # disable mlas with webassembly diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 8567914415e459..4d94abf72ebfc0 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -284,14 +284,9 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { inferencePrecision = ov::element::bf16; } } else if (prec == ov::element::f16) { -#if defined(OPENVINO_ARCH_X86_64) if (hasHardwareSupport(ov::element::f16)) { inferencePrecision = ov::element::f16; } -#elif defined(OV_CPU_ARM_ENABLE_FP16) - // TODO: add runtime FP16 feature support check for ARM - inferencePrecision = ov::element::f16; -#endif } else if (prec == ov::element::f32) { inferencePrecision = ov::element::f32; } else { @@ -382,12 +377,13 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { if (!inferencePrecisionSetExplicitly) { if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) { inferencePrecision = ov::element::f32; -#if defined(OV_CPU_ARM_ENABLE_FP16) - inferencePrecision = ov::element::f16; -#else +#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) + if (hasHardwareSupport(ov::element::f16)) { + inferencePrecision = ov::element::f16; + } +#endif if (mayiuse(avx512_core_bf16)) inferencePrecision = ov::element::bf16; -#endif } else { inferencePrecision = ov::element::f32; } diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 3ffff01c6da6e7..ec9f4012ce53e0 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -37,11 +37,10 @@ #include "utils/ngraph_utils.hpp" #include "utils/node_dumper.h" #include "utils/verbose.h" +#include "utils/precision_support.h" #include -#if defined(OV_CPU_ARM_ENABLE_FP16) #include "common/primitive_desc_iface.hpp" -#endif #include "openvino/runtime/memory_solver.hpp" @@ -425,10 +424,12 @@ static bool isReorderAvailable(const MemoryDescPtr& parentDesc, const MemoryDesc dnnl_primitive_desc_t result = nullptr; auto status = dnnl_reorder_primitive_desc_create(&result, srcMemDesc.get(), eng.get(), dstMemDesc.get(), eng.get(), attr.get()); -#if defined(OV_CPU_ARM_ENABLE_FP16) +#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) // temporary WA for slow FP32->FP16 conversion reorder in oneDNN on ARM // pretend the reorder is not available to use Convert node instead - if (result && parse_impl_name(result->impl()->name()) == ref_any) { + if (hasHardwareSupport(ov::element::f16) && + result && + parse_impl_name(result->impl()->name()) == ref_any) { dnnl_primitive_desc_destroy(result); return false; } @@ -1607,7 +1608,7 @@ void Graph::EnforceInferencePrecision() { if (inferPrec == ov::element::f32) return; // nothing to do, only precision reduction is currently allowed -#if defined(OV_CPU_ARM_ENABLE_FP16) +#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) if (inferPrec == ov::element::f16) return; // precision of configured by ov::pass::ConvertPrecision #endif diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp index cdc038fbf9155d..ae091deed57121 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp @@ -4,6 +4,7 @@ #include "acl_eltwise.hpp" #include "acl_utils.hpp" +#include "utils/debug_capabilities.h" namespace ov { namespace intel_cpu { @@ -31,6 +32,17 @@ inline VectorDims reshape_sizes(VectorDims dims) { return result_dims; } +inline void log_unsupported_prec(const std::vector& srcDescs, + const std::vector& dstDescs, + const Algorithm eltwiseAlgorithm) { + std::string srcPrec; + for (size_t i = 0; i < srcDescs.size(); i++) { + srcPrec += srcDescs[i]->getPrecision().to_string() + " "; + } + DEBUG_LOG(algToString(eltwiseAlgorithm), ": provided combination of src precisions: [", srcPrec, + "] and dst precision: ", dstDescs[0]->getPrecision().to_string(), " is not supported"); +} + bool AclEltwiseExecutor::isEltwiseAlgorithmSupported(Algorithm algorithm) { if (one_of(algorithm, Algorithm::EltwiseSqrt, Algorithm::EltwiseDivide, @@ -94,6 +106,7 @@ bool AclEltwiseExecutorBuilder::isSupported(const EltwiseAttrs& eltwiseAttrs, case Algorithm::EltwiseHswish: if (!(checkPrecision({ov::element::f16, ov::element::f16}, ov::element::f16) || checkPrecision({ov::element::f32, ov::element::f32}, ov::element::f32))) { + log_unsupported_prec(srcDescs, dstDescs, eltwiseAttrs.algorithm); return false; } break; @@ -103,6 +116,7 @@ bool AclEltwiseExecutorBuilder::isSupported(const EltwiseAttrs& eltwiseAttrs, if (!(checkPrecision({ov::element::i32, ov::element::i32}, ov::element::i32) || checkPrecision({ov::element::f16, ov::element::f16}, ov::element::f16) || checkPrecision({ov::element::f32, ov::element::f32}, ov::element::f32))) { + log_unsupported_prec(srcDescs, dstDescs, eltwiseAttrs.algorithm); return false; } break; @@ -113,6 +127,7 @@ bool AclEltwiseExecutorBuilder::isSupported(const EltwiseAttrs& eltwiseAttrs, checkPrecision({ov::element::i32, ov::element::i32}, ov::element::i32) || checkPrecision({ov::element::f16, ov::element::f16}, ov::element::f16) || checkPrecision({ov::element::f32, ov::element::f32}, ov::element::f32))) { + log_unsupported_prec(srcDescs, dstDescs, eltwiseAttrs.algorithm); return false; } break; @@ -123,6 +138,7 @@ bool AclEltwiseExecutorBuilder::isSupported(const EltwiseAttrs& eltwiseAttrs, checkPrecision({ov::element::i32, ov::element::i32}, ov::element::i32) || checkPrecision({ov::element::f16, ov::element::f16}, ov::element::f16) || checkPrecision({ov::element::f32, ov::element::f32}, ov::element::f32))) { + log_unsupported_prec(srcDescs, dstDescs, eltwiseAttrs.algorithm); return false; } break; @@ -134,6 +150,7 @@ bool AclEltwiseExecutorBuilder::isSupported(const EltwiseAttrs& eltwiseAttrs, checkPrecision({ov::element::i16, ov::element::i16}, ov::element::i16) || checkPrecision({ov::element::f16, ov::element::f16}, ov::element::f16) || checkPrecision({ov::element::f32, ov::element::f32}, ov::element::f32))) { + log_unsupported_prec(srcDescs, dstDescs, eltwiseAttrs.algorithm); return false; } break; @@ -149,20 +166,26 @@ bool AclEltwiseExecutorBuilder::isSupported(const EltwiseAttrs& eltwiseAttrs, checkPrecision({ov::element::i32, ov::element::i32}, ov::element::u8) || checkPrecision({ov::element::f16, ov::element::f16}, ov::element::u8) || checkPrecision({ov::element::f32, ov::element::f32}, ov::element::u8))) { + log_unsupported_prec(srcDescs, dstDescs, eltwiseAttrs.algorithm); return false; } break; default: + DEBUG_LOG("Eltwise algorithm ", algToString(eltwiseAttrs.algorithm), " is not supported"); return false; } for (const auto & srcDesc : srcDescs) { - if (getAclDataLayoutByMemoryDesc(srcDesc) == arm_compute::DataLayout::UNKNOWN) + if (getAclDataLayoutByMemoryDesc(srcDesc) == arm_compute::DataLayout::UNKNOWN) { + DEBUG_LOG("src descriptor layout is unsupported by ACL: ", srcDesc->serializeFormat()); return false; + } } for (const auto & dstDesc : dstDescs) { - if (getAclDataLayoutByMemoryDesc(dstDesc) == arm_compute::DataLayout::UNKNOWN) + if (getAclDataLayoutByMemoryDesc(dstDesc) == arm_compute::DataLayout::UNKNOWN) { + DEBUG_LOG("dst descriptor layout is unsupported by ACL: ", dstDesc->serializeFormat()); return false; + } } return true; diff --git a/src/plugins/intel_cpu/src/nodes/reorder.cpp b/src/plugins/intel_cpu/src/nodes/reorder.cpp index a0d3f101f573a3..e1db1540fdd4b5 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorder.cpp @@ -24,11 +24,9 @@ #include "nodes/common/reorder_prim.h" #include "openvino/core/parallel.hpp" #include "shape_inference/shape_inference_pass_through.hpp" - -#if defined(OV_CPU_ARM_ENABLE_FP16) +#include "utils/precision_support.h" #include "nodes/executors/executor.hpp" #include "nodes/executors/transpose_list.hpp" -#endif namespace ov { namespace intel_cpu { @@ -128,7 +126,6 @@ void Reorder::executeDynamicImpl(dnnl::stream strm) { execute(strm); } -#if defined(OV_CPU_ARM_ENABLE_FP16) void Reorder::prepareReorderAsTranspose(MemoryDescPtr parentDesc, MemoryDescPtr childDesc) { auto getOrderAndBlockedDims = [](const MemoryDesc& lhs, const MemoryDesc& rhs) -> std::pair, std::vector> { const auto& in = lhs.as()->getBlockDims(); @@ -180,7 +177,6 @@ void Reorder::prepareReorderAsTranspose(MemoryDescPtr parentDesc, MemoryDescPtr getSelectedPrimitiveDescriptor()->setImplementationType(transposeExecutor->implType()); return; } -#endif // OV_CPU_ARM_ENABLE_FP16 void Reorder::prepareParams() { if (isOptimized) @@ -211,7 +207,7 @@ void Reorder::prepareParams() { const auto& parentDesc = srcMemPtr->getDescPtr(); const auto& childDesc = dstMemPtr->getDescPtr(); -#if defined(OV_CPU_ARM_ENABLE_FP16) +#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) // @todo current oneDNN v3.2 lacks optimized jit implementation for fp16 reorders. // Use transpose executor as a temporary WA. if (everyone_is(ov::element::f16, parentDesc->getPrecision(), childDesc->getPrecision()) && @@ -405,7 +401,7 @@ void Reorder::optimizedNspc2Ncsp() { } void Reorder::execute(dnnl::stream strm) { -#if defined(OV_CPU_ARM_ENABLE_FP16) +#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) if (transposeExecutor) { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(0); diff --git a/src/plugins/intel_cpu/src/nodes/reorder.h b/src/plugins/intel_cpu/src/nodes/reorder.h index 07a7b7b53230be..cb99caa07bdfa6 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.h +++ b/src/plugins/intel_cpu/src/nodes/reorder.h @@ -6,9 +6,7 @@ #include -#if defined(OV_CPU_ARM_ENABLE_FP16) #include "nodes/executors/transpose.hpp" -#endif namespace ov { namespace intel_cpu { @@ -76,10 +74,9 @@ class Reorder : public Node { void optimizedNspc2Ncsp(); void optimizedNcsp2Nspc(); void createReorderPrimitive(const dnnl::memory::desc &srcDesc, void* srcPtr, const dnnl::memory::desc &dstDesc, void* dstPtr); -#if defined(OV_CPU_ARM_ENABLE_FP16) + void prepareReorderAsTranspose(MemoryDescPtr parentDesc, MemoryDescPtr childDesc); TransposeExecutorPtr transposeExecutor; -#endif }; } // namespace node diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 8dbdd42cee0726..cdea46e202b1cd 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -318,9 +318,10 @@ void Transformations::PreLpt(const std::vector& defaultPrecis // @todo should we always convert to f32 regardless of hardware support, as it is done for f16? if (!hasHardwareSupport(ov::element::bf16)) map.insert({ov::element::bf16, ov::element::f32}); -#if defined(OV_CPU_ARM_ENABLE_FP16) - if (inferencePrecision != ov::element::f16) - map.insert({ov::element::f16, ov::element::f32}); +#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) + if (inferencePrecision != ov::element::f16) { + map.insert({ov::element::f16, ov::element::f32}); + } #else map.insert({ov::element::f16, ov::element::f32}); #endif @@ -329,11 +330,12 @@ void Transformations::PreLpt(const std::vector& defaultPrecis type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}}; -#if defined(OV_CPU_ARM_ENABLE_FP16) +#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) // It cannot be static data, because it may be difference for different inferencePrecision const auto precisions = get_convert_precisions(); if (inferencePrecision == ov::element::f16) { precisions_map fp_convert_precision_map = {{ov::element::f32, ov::element::f16}}; + //keep fq nodes in f32 prec to avoid performance degradation type_to_fuse_map f16_fuse_map = {{ov::opset1::FakeQuantize::get_type_info_static(), fuse_type_to_fq}}; const bool keep_precision_sensitive_in_fp32 = true; CPU_REGISTER_PASS_COMMON(manager, diff --git a/src/plugins/intel_cpu/src/utils/precision_support.cpp b/src/plugins/intel_cpu/src/utils/precision_support.cpp index 4a89002e63da48..e2e55a4d0f6cca 100644 --- a/src/plugins/intel_cpu/src/utils/precision_support.cpp +++ b/src/plugins/intel_cpu/src/utils/precision_support.cpp @@ -4,10 +4,16 @@ #include "precision_support.h" +#if defined(OPENVINO_ARCH_X86_64) #include "cpu/x64/cpu_isa_traits.hpp" +#endif #include "openvino/core/type/element_type.hpp" #include "openvino/core/visibility.hpp" +#if defined(OV_CPU_WITH_ACL) +#include "arm_compute/core/CPP/CPPTypes.h" +#endif + namespace ov { namespace intel_cpu { @@ -17,8 +23,10 @@ static bool hasFP16HardwareSupport(const ov::element::Type& precision) { dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2)) return true; return false; -#elif defined(OV_CPU_ARM_ENABLE_FP16) - return true; // @todo add runtime check for arm as well +#elif defined(OPENVINO_ARCH_ARM64) && defined(OV_CPU_WITH_ACL) + //has_fp16() works correctly on aarch64 only + //TODO: remove else branch as soon as ACL issue #1096 is fixed + return arm_compute::CPUInfo::get().has_fp16(); #else return false; #endif diff --git a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt index 3a58130da3463e..8e32bc3ec059b6 100644 --- a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt +++ b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt @@ -4,14 +4,32 @@ set(TARGET_NAME ov_cpu_func_tests) -add_library(cpuSpecificRtInfo STATIC +if(SUGGEST_OVERRIDE_SUPPORTED) + # xbyak compilation fails + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-suggest-override") +endif() + +add_library(cpuUtils STATIC $/src/utils/rt_info/memory_formats_attribute.hpp - $/src/utils/rt_info/memory_formats_attribute.cpp) -target_link_libraries(cpuSpecificRtInfo PRIVATE openvino::runtime) + $/src/utils/rt_info/memory_formats_attribute.cpp + $/src/utils/precision_support.h + $/src/utils/precision_support.cpp) +set(CPU_UTILS_LINK_LIBRARIES openvino::runtime) +set(CPU_UTILS_INCLUDE_PATHS) +if(OV_CPU_WITH_ACL) + list(APPEND CPU_UTILS_LINK_LIBRARIES arm_compute::arm_compute) + list(APPEND CPU_UTILS_INCLUDE_PATHS $) +endif() +if(OV_CPU_WITH_DNNL) + list(APPEND CPU_UTILS_LINK_LIBRARIES dnnl) + list(APPEND CPU_UTILS_INCLUDE_PATHS $/thirdparty/onednn/src) +endif() +target_link_libraries(cpuUtils PRIVATE ${CPU_UTILS_LINK_LIBRARIES}) +target_include_directories(cpuUtils PUBLIC ${CPU_UTILS_INCLUDE_PATHS}) set(INCLUDES ${CMAKE_CURRENT_SOURCE_DIR} $/src) set(DEPENDENCIES openvino_intel_cpu_plugin openvino_template_extension) -set(LINK_LIBRARIES funcSharedTests cpuSpecificRtInfo openvino::snippets ov_snippets_models) +set(LINK_LIBRARIES funcSharedTests cpuUtils openvino::snippets ov_snippets_models) if(ENABLE_OV_ONNX_FRONTEND) list(APPEND DEFINES TEST_MODELS="${TEST_MODEL_ZOO}") diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp index 1b29347d6c0605..27c80bce3fc1a0 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp @@ -5,6 +5,7 @@ #include #include +#include "utils/precision_support.h" #include "utils/properties_test.hpp" #include "common_test_utils/test_assertions.hpp" #include "openvino/runtime/properties.hpp" @@ -208,8 +209,8 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigAffinityCore) { ASSERT_EQ(false, value); } -#if defined(OV_CPU_ARM_ENABLE_FP16) - const auto expected_precision_for_performance_mode = ov::element::f16; +#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) + const auto expected_precision_for_performance_mode = ov::intel_cpu::hasHardwareSupport(ov::element::f16) ? ov::element::f16 : ov::element::f32; #else const auto expected_precision_for_performance_mode = ov::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32; #endif diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp index 9dbdd255263b35..d2edd5a14eceb4 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp @@ -8,12 +8,11 @@ namespace ov { namespace test { void core_configuration(ov::test::SubgraphBaseTest* test) { - #if defined(OV_CPU_ARM_ENABLE_FP16) || defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) //force fp32 inference precision if it is not configured specially if (!test->configuration.count(ov::hint::inference_precision.name())) { test->configuration.insert({ov::hint::inference_precision.name(), ov::element::f32.to_string()}); } - #endif + // todo: issue: 123320 test->convert_precisions.insert({ov::element::bf16, ov::element::f32}); test->convert_precisions.insert({ov::element::f16, ov::element::f32}); diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 6ad35a8105d7b1..485c9cb5bd615a 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -5,6 +5,7 @@ #include "openvino/core/visibility.hpp" #include "functional_test_utils/skip_tests_config.hpp" #include "openvino/runtime/system_conf.hpp" +#include "utils/precision_support.h" #include #include @@ -337,7 +338,6 @@ std::vector disabledTestPatterns() { // int8 specific retVector.emplace_back(R"(smoke_Quantized.*)"); -# if defined(OV_CPU_ARM_ENABLE_FP16) // Issue: 123019 retVector.emplace_back(R"(smoke_staticShapes4D.*INFERENCE_PRECISION_HINT=f16.*)"); retVector.emplace_back(R"(smoke_dynamicShapes4D.*INFERENCE_PRECISION_HINT=f16.*)"); @@ -351,7 +351,6 @@ std::vector disabledTestPatterns() { // Issue: 124395 retVector.emplace_back(R"(smoke_VariableStateBasic/InferRequestVariableStateTest.*)"); retVector.emplace_back(R"(smoke_VariableState/OVInferRequestVariableStateTest.*)"); -# endif #endif @@ -416,14 +415,14 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(.*INFERENCE_PRECISION_HINT=(F|f)16.*)"); } #elif defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_ARM) -# if !defined(OV_CPU_ARM_ENABLE_FP16) - // Skip fp16 tests for paltforms that don't support fp16 precision - retVector.emplace_back(R"(.*INFERENCE_PRECISION_HINT=(F|f)16.*)"); -# else - // Issue 117407 - retVector.emplace_back( - R"(.*EltwiseLayerCPUTest.*IS=\(\[1\.\.10\.2\.5\.6\]_\).*eltwiseOpType=SqDiff.*_configItem=INFERENCE_PRECISION_HINT=f16.*)"); -# endif // OV_CPU_ARM_ENABLE_FP16 + if (!ov::intel_cpu::hasHardwareSupport(ov::element::f16)) { + // Skip fp16 tests for paltforms that don't support fp16 precision + retVector.emplace_back(R"(.*INFERENCE_PRECISION_HINT=(F|f)16.*)"); + } else { + // Issue 117407 + retVector.emplace_back( + R"(.*EltwiseLayerCPUTest.*IS=\(\[1\.\.10\.2\.5\.6\]_\).*eltwiseOpType=SqDiff.*_configItem=INFERENCE_PRECISION_HINT=f16.*)"); + } #endif if (!ov::with_cpu_x86_avx512_core_vnni() && !ov::with_cpu_x86_avx512_core_amx_int8()) { // MatMul in Snippets uses BRGEMM that supports i8 only on platforms with VNNI or AMX instructions diff --git a/src/plugins/intel_cpu/thirdparty/ACLConfig.cmake b/src/plugins/intel_cpu/thirdparty/ACLConfig.cmake index 3afbae622af835..09774aa4bec493 100644 --- a/src/plugins/intel_cpu/thirdparty/ACLConfig.cmake +++ b/src/plugins/intel_cpu/thirdparty/ACLConfig.cmake @@ -98,7 +98,6 @@ elseif(NOT TARGET arm_compute::arm_compute) # set(ARM_COMPUTE_SOURCE_DIR "${intel_cpu_thirdparty_SOURCE_DIR}/ComputeLibrary") - set(ARM_COMPUTE_BINARY_DIR "${intel_cpu_thirdparty_BINARY_DIR}/ComputeLibrary") message(STATUS "Configure to build ${ARM_COMPUTE_SOURCE_DIR}") @@ -149,17 +148,16 @@ elseif(NOT TARGET arm_compute::arm_compute) list(APPEND ARM_COMPUTE_OPTIONS estate=32) else() list(APPEND ARM_COMPUTE_OPTIONS estate=64) - if(NOT APPLE AND CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10.2) - # arm_sve.h header is not available on gcc older 10.2 - # TODO: validate it on machines with FP16 / SVE support and enabled back - # list(APPEND ARM_COMPUTE_OPTIONS multi_isa=1) + if(OV_CPU_AARCH64_USE_MULTI_ISA) + list(APPEND ARM_COMPUTE_OPTIONS multi_isa=1) + # let's additionally enable SME as well + set(extra_cxx_flags "${extra_cxx_flags} -DENABLE_SME -DARM_COMPUTE_ENABLE_SME -DARM_COMPUTE_ENABLE_SME2") endif() endif() if(NOT MSVC64) list(APPEND ARM_COMPUTE_OPTIONS - build_dir=${ARM_COMPUTE_BINARY_DIR} - install_dir=${ARM_COMPUTE_BINARY_DIR}/install) + install_dir=install) endif() if(ARM_COMPUTE_SCONS_JOBS) @@ -329,11 +327,10 @@ elseif(NOT TARGET arm_compute::arm_compute) if(MSVC64) set(arm_compute build/arm_compute-static.lib) - set(arm_compute_full_path "${ARM_COMPUTE_SOURCE_DIR}/${arm_compute}") else() - set(arm_compute ${ARM_COMPUTE_BINARY_DIR}/libarm_compute-static.a) - set(arm_compute_full_path "${arm_compute}") + set(arm_compute build/libarm_compute-static.a) endif() + set(arm_compute_full_path "${ARM_COMPUTE_SOURCE_DIR}/${arm_compute}") list(APPEND ARM_COMPUTE_OPTIONS fixed_format_kernels=True)