Skip to content

Commit

Permalink
[ARM] Added ARM32 ACL kernels calls
Browse files Browse the repository at this point in the history
3.5 squash list:
[FORK][FIX] ARM changes for oneDNN 3.5 upgrade
  • Loading branch information
luweizhou2016 committed Jul 29, 2024
1 parent 7cf9cbe commit ba12d02
Show file tree
Hide file tree
Showing 75 changed files with 349 additions and 315 deletions.
2 changes: 1 addition & 1 deletion .github/automation/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ CMAKE_OPTIONS="${CMAKE_OPTIONS}
# NOTE: only for AArch64 builds.
if [ ! -z ${ACL_DIR} ]; then
export ACL_ROOT_DIR=$ACL_DIR
CMAKE_OPTIONS="${CMAKE_OPTIONS} -DDNNL_AARCH64_USE_ACL=ON"
CMAKE_OPTIONS="${CMAKE_OPTIONS} -DDNNL_USE_ACL=ON"
echo "Info: Building with Arm Compute Library backend for Aarch64..."
fi

Expand Down
4 changes: 2 additions & 2 deletions cmake/ACL.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ if(NOT DNNL_TARGET_ARCH MATCHES "^(AARCH64|ARM)$")
return()
endif()

if(NOT DNNL_AARCH64_USE_ACL)
if(NOT DNNL_USE_ACL)
return()
endif()

Expand Down Expand Up @@ -67,7 +67,7 @@ if(ACL_FOUND)
message(STATUS "Arm Compute Library: ${ACL_LIBRARIES}")
message(STATUS "Arm Compute Library headers: ${ACL_INCLUDE_DIRS}")

add_definitions(-DDNNL_AARCH64_USE_ACL)
add_definitions(-DDNNL_USE_ACL)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_EXTENSIONS "OFF")
endif()
2 changes: 1 addition & 1 deletion cmake/options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ set(DNNL_BLAS_VENDOR "NONE" CACHE STRING
# AArch64 optimizations with Arm Compute Library
# ==============================================

option(DNNL_AARCH64_USE_ACL "Enables use of AArch64 optimised functions
option(DNNL_USE_ACL "Enables use of ARM optimised functions
from Arm Compute Library.
This is only supported on AArch64 builds and assumes there is a
functioning Compute Library build available at the location specified by the
Expand Down
2 changes: 1 addition & 1 deletion doc/build/build.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ make -j
~~~sh
export ACL_ROOT_DIR=<path/to/Compute Library>
cmake .. \
-DDNNL_AARCH64_USE_ACL=ON \
-DDNNL_USE_ACL=ON \
<extra build options>
~~~

Expand Down
3 changes: 3 additions & 0 deletions src/cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ endif()
if (DNNL_TARGET_ARCH STREQUAL "AARCH64")
add_subdirectory(aarch64)
endif()
if (DNNL_USE_ACL)
add_subdirectory(acl)
endif()
if (DNNL_TARGET_ARCH STREQUAL "PPC64")
add_subdirectory(ppc64)
endif()
Expand Down
8 changes: 0 additions & 8 deletions src/cpu/aarch64/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,6 @@ file(GLOB XBYAK_AARCH64_FILES

list(REMOVE_ITEM SOURCES ${XBYAK_AARCH64_FILES})

if(NOT DNNL_AARCH64_USE_ACL)
file(GLOB_RECURSE ACL_FILES
${CMAKE_CURRENT_SOURCE_DIR}/acl_*.[ch]
${CMAKE_CURRENT_SOURCE_DIR}/acl_*.[ch]pp
)
list(REMOVE_ITEM SOURCES ${ACL_FILES})
endif()

# If the runtime is not THREADPOOL remove threadpool_scheduler sources.
if(NOT DNNL_CPU_RUNTIME STREQUAL "THREADPOOL")
list(APPEND ACL_THREADPOOL_FILES
Expand Down
6 changes: 3 additions & 3 deletions src/cpu/aarch64/acl_reorder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
namespace dnnl {
namespace impl {
namespace cpu {
namespace aarch64 {
namespace acl {

status_t acl_reorder_fwd_t::execute_forward(const exec_ctx_t &ctx) const {
// Lock here is needed because resource_mapper does not support
Expand All @@ -46,7 +46,7 @@ status_t acl_reorder_fwd_t::execute_forward(const exec_ctx_t &ctx) const {
return status::success;
}

} // namespace aarch64
} // namespace acl
} // namespace cpu
} // namespace impl
} // namespace dnnl
} // namespace dnnl
14 changes: 7 additions & 7 deletions src/cpu/aarch64/acl_reorder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,19 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#ifndef CPU_AARCH64_ACL_REORDER_HPP
#define CPU_AARCH64_ACL_REORDER_HPP
#ifndef CPU_ACL_REORDER_HPP
#define CPU_ACL_REORDER_HPP

#include "arm_compute/core/Types.h"
#include "common/utils.hpp"
#include "cpu/aarch64/acl_utils.hpp"
#include "cpu/acl/acl_utils.hpp"
#include "cpu/aarch64/cpu_isa_traits.hpp"
#include "cpu/reorder/cpu_reorder_pd.hpp"

namespace dnnl {
namespace impl {
namespace cpu {
namespace aarch64 {
namespace acl {

struct acl_reorder_obj_t {
arm_compute::NEReorderLayer reorder;
Expand Down Expand Up @@ -131,7 +131,7 @@ struct acl_reorder_fwd_t : public primitive_t {
if (dst_tag == format_tag::BA4b4a || dst_tag == format_tag::Acdb4a
|| dst_tag == format_tag::Ab4a) {
_pd->app_.dst_wf = arm_compute::WeightFormat::OHWIo4;
} else if (mayiuse(sve_256)
} else if (aarch64::mayiuse(aarch64::sve_256)
&& (dst_tag == format_tag::BA8b4a
|| dst_tag == format_tag::Acdb8a
|| dst_tag == format_tag::Ab8a)) {
Expand Down Expand Up @@ -239,9 +239,9 @@ struct acl_reorder_fwd_t : public primitive_t {

}; // acl_reorder_fwd_t

} // namespace aarch64
} // namespace acl
} // namespace cpu
} // namespace impl
} // namespace dnnl

#endif // CPU_AARCH64_ACL_REORDER_HPP
#endif // CPU_ACL_REORDER_HPP
36 changes: 36 additions & 0 deletions src/cpu/acl/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#*******************************************************************************
# Copyright 2020-2022 Arm Ltd. and affiliates
# Copyright 2020-2021 FUJITSU LIMITED
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#*******************************************************************************

file(GLOB_RECURSE SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/*.[ch]
${CMAKE_CURRENT_SOURCE_DIR}/*.[ch]pp
)

# If the runtime is not THREADPOOL remove threadpool_scheduler sources.
if(NOT DNNL_CPU_RUNTIME STREQUAL "THREADPOOL")
list(APPEND ACL_THREADPOOL_FILES
${CMAKE_CURRENT_SOURCE_DIR}/acl_threadpool_scheduler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/acl_threadpool_scheduler.hpp
)
list(REMOVE_ITEM SOURCES ${ACL_THREADPOOL_FILES})
endif()

set(OBJ_LIB ${DNNL_LIBRARY_NAME}_cpu_acl)
add_library(${OBJ_LIB} OBJECT ${SOURCES})
set_property(GLOBAL APPEND PROPERTY DNNL_LIB_DEPS
$<TARGET_OBJECTS:${OBJ_LIB}>)
enable_conditional_compilation4(${OBJ_LIB})
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
* limitations under the License.
*******************************************************************************/

#include "cpu/aarch64/acl_batch_normalization.hpp"
#include "cpu/acl/acl_batch_normalization.hpp"

namespace dnnl {
namespace impl {
namespace cpu {
namespace aarch64 {
namespace acl {

status_t acl_batch_normalization_fwd_t::execute_forward(
const exec_ctx_t &ctx) const {
Expand Down Expand Up @@ -72,7 +72,7 @@ status_t acl_batch_normalization_fwd_t::execute_forward(
return status::success;
}

} // namespace aarch64
} // namespace acl
} // namespace cpu
} // namespace impl
} // namespace dnnl
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,18 @@
* limitations under the License.
*******************************************************************************/

#ifndef CPU_AARCH64_ACL_BATCH_NORMALIZATION_HPP
#define CPU_AARCH64_ACL_BATCH_NORMALIZATION_HPP
#ifndef CPU_ACL_BATCH_NORMALIZATION_HPP
#define CPU_ACL_BATCH_NORMALIZATION_HPP

#include "cpu/cpu_batch_normalization_pd.hpp"

#include "cpu/aarch64/acl_post_ops.hpp"
#include "cpu/aarch64/acl_utils.hpp"
#include "cpu/acl/acl_post_ops.hpp"
#include "cpu/acl/acl_utils.hpp"

namespace dnnl {
namespace impl {
namespace cpu {
namespace aarch64 {
namespace acl {

struct acl_batch_normalization_obj_t {
arm_compute::NEBatchNormalizationLayer bnorm;
Expand Down Expand Up @@ -274,7 +274,7 @@ struct acl_batch_normalization_fwd_t : public primitive_t {
const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); }
}; // acl_batch_normalization_fwd_t

} // namespace aarch64
} // namespace acl
} // namespace cpu
} // namespace impl
} // namespace dnnl
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@
* limitations under the License.
*******************************************************************************/

#include "cpu/aarch64/acl_benchmark_scheduler.hpp"
#include "cpu/acl/acl_benchmark_scheduler.hpp"
#include "common/verbose.hpp"

namespace dnnl {
namespace impl {
namespace cpu {
namespace aarch64 {
namespace acl {
using namespace arm_compute;

BenchmarkScheduler::BenchmarkScheduler(IScheduler &real_scheduler)
Expand Down Expand Up @@ -72,7 +72,7 @@ void BenchmarkScheduler::run_workloads(std::vector<Workload> &workloads) {
ARM_COMPUTE_ERROR("Can't be reached");
}

} // namespace aarch64
} // namespace acl
} // namespace cpu
} // namespace impl
} // namespace dnnl
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#ifndef CPU_AARCH64_ACL_BENCHMARK_SCHEDULER_HPP
#define CPU_AARCH64_ACL_BENCHMARK_SCHEDULER_HPP
#ifndef CPU_ACL_BENCHMARK_SCHEDULER_HPP
#define CPU_ACL_BENCHMARK_SCHEDULER_HPP

#include "arm_compute/core/CPP/ICPPKernel.h"
#include "arm_compute/runtime/IScheduler.h"

namespace dnnl {
namespace impl {
namespace cpu {
namespace aarch64 {
namespace acl {
// BenchmarkScheduler implement's ACL IScheduler interface and acts as an interceptor scheduler
// when DNNL_VERBOSE=profile,profile_externals. It intercepts calls made by the actual scheduler used by ACL and adds
// timers to benchmark execution time of ACL kernels and store kernel information.
Expand Down Expand Up @@ -52,9 +52,9 @@ class BenchmarkScheduler final : public arm_compute::IScheduler {
IScheduler &_real_scheduler;
};

#endif // CPU_AARCH64_ACL_BENCHMARK_SCHEDULER_HPP
#endif // CPU_ACL_BENCHMARK_SCHEDULER_HPP

} // namespace aarch64
} // namespace acl
} // namespace cpu
} // namespace impl
} // namespace dnnl
6 changes: 3 additions & 3 deletions src/cpu/aarch64/acl_binary.cpp → src/cpu/acl/acl_binary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
* limitations under the License.
*******************************************************************************/

#include "cpu/aarch64/acl_binary.hpp"
#include "cpu/acl/acl_binary.hpp"

namespace dnnl {
namespace impl {
namespace cpu {
namespace aarch64 {
namespace acl {

status_t acl_binary_t::execute_forward(const exec_ctx_t &ctx, const void *src0,
const void *src1, void *dst) const {
Expand Down Expand Up @@ -55,7 +55,7 @@ status_t acl_binary_t::execute_forward(const exec_ctx_t &ctx) const {
return execute_forward(ctx, src0, src1, dst);
}

} // namespace aarch64
} // namespace acl
} // namespace cpu
} // namespace impl
} // namespace dnnl
10 changes: 5 additions & 5 deletions src/cpu/aarch64/acl_binary.hpp → src/cpu/acl/acl_binary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@
* limitations under the License.
*******************************************************************************/

#ifndef CPU_AARCH64_ACL_BINARY_HPP
#define CPU_AARCH64_ACL_BINARY_HPP
#ifndef CPU_ACL_BINARY_HPP
#define CPU_ACL_BINARY_HPP

#include "cpu/cpu_binary_pd.hpp"

#include "cpu/aarch64/acl_utils.hpp"
#include "cpu/acl/acl_utils.hpp"

namespace dnnl {
namespace impl {
namespace cpu {
namespace aarch64 {
namespace acl {

struct acl_binary_obj_t {
std::unique_ptr<arm_compute::IFunction> binary_op;
Expand Down Expand Up @@ -270,7 +270,7 @@ struct acl_binary_t : public primitive_t {

}; // acl_binary_t

} // namespace aarch64
} // namespace acl
} // namespace cpu
} // namespace impl
} // namespace dnnl
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@

#include <cstdint>

#include "cpu/aarch64/acl_convolution_utils.hpp"
#include "cpu/acl/acl_convolution_utils.hpp"
#include "common/convolution_pd.hpp"
#include "common/utils.hpp"
#include "oneapi/dnnl/dnnl.h"

namespace dnnl {
namespace impl {
namespace cpu {
namespace aarch64 {
namespace acl {

namespace acl_convolution_utils {

Expand Down Expand Up @@ -418,7 +418,7 @@ status_t init_conf_depthwise(acl_conv_conf_t &acp, memory_desc_t &src_md,

} // namespace acl_convolution_utils

} // namespace aarch64
} // namespace acl
} // namespace cpu
} // namespace impl
} // namespace dnnl
Loading

0 comments on commit ba12d02

Please sign in to comment.