Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[v1.x] Revise MKLDNN Builds on Arm and Add a CMake Template for Arm #20266

Merged
merged 3 commits into from
May 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,16 @@ if(USE_MKLDNN)
endif()

function(load_mkldnn)
if (MKLDNN_USE_ACL)
# C++ 14 is requried to build with ACL
# Also ACL_ROOT_DIR need to be set
set(CMAKE_CXX_STANDARD 14)
set(DNNL_AARCH64_USE_ACL ON CACHE INTERNAL "" FORCE)
endif()
if (MKLDNN_USE_APL)
# APL needs to be added to LD_LIBRARY_PATH
set(DNNL_BLAS_VENDOR "ARMPL" CACHE INTERNAL "" FORCE)
endif()
set(MKLDNN_BUILD_TESTS OFF CACHE INTERNAL "" FORCE)
set(MKLDNN_BUILD_EXAMPLES OFF CACHE INTERNAL "" FORCE)
set(MKLDNN_ARCH_OPT_FLAGS "" CACHE INTERNAL "" FORCE)
Expand Down
20 changes: 8 additions & 12 deletions config/distribution/linux_aarch64_cpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,13 @@ set(USE_F16C OFF CACHE BOOL "Build with x86 F16C instruction support")
set(USE_DIST_KVSTORE OFF CACHE BOOL "Build with DIST_KVSTORE support")

set(USE_MKLDNN ON CACHE BOOL "Build with MKL-DNN support")
# Uncomment the following line to build MKLDNN with APL support
# APL can be downloaded from https://developer.arm.com/tools-and-software/server-and-hpc/downloads/arm-performance-libraries
# APL needs to be added to LD_LIBRARY_PATH
## set(DNNL_BLAS_VENDOR “ARMPL” CACHE STRING “Build MKLDNN with Arm Performance Libraries as the BLAS library”)
# Uncomment the following lines to build MKLDNN with ACL support
# C++ 14 is requried to build with ACL and MKLDNN recommends building ACL from source rather than
# using the pre-built binaries from https://github.com/ARM-software/ComputeLibrary/releases
# If pre-built binaries are used anyways, make sure to copy and rename the appropriate binaries
# folder from <acl_root>/lib/<binaries_folder> to <acl_root>/build
# Pre-built binaries are available from https://github.com/ARM-software/ComputeLibrary/releases
# Make sure to copy and rename the appropriate binaries folder
# from <acl_root>/lib/<binaries_folder_for_your_arch> to <acl_root>/build
Zha0q1 marked this conversation as resolved.
Show resolved Hide resolved
# The resulting acl root folder should look something like:
# LICENSE README.md arm_compute build examples include lib scripts support utils
## set(CMAKE_CXX_STANDARD 14)
## set(ENV{ACL_ROOT_DIR} ~/arm_compute-v21.02-bin-linux)
## set(DNNL_AARCH64_USE_ACL ON CACHE BOOL “Build MKLDNN with Arm Compute Library integration”)
set(ENV{ACL_ROOT_DIR} "")
set(MKLDNN_USE_ACL OFF CACHE BOOL "Integrate MKLDNN with Arm Compute Library")
# APL can be downloaded from https://developer.arm.com/tools-and-software/server-and-hpc/downloads/arm-performance-libraries
# Note that APL needs to be added to LD_LIBRARY_PATH
set(MKLDNN_USE_APL OFF CACHE BOOL "Integrate MKLDNN with Arm Performance Libraries")
149 changes: 149 additions & 0 deletions config/linux_arm.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

#-------------------------------------------------------------------------------
# Template configuration for compiling MXNet
#
# If you want to change the configuration, please use the following steps.
# Assume you are on the root directory of mxnet. First copy this file so that
# any local changes will be ignored by git
#
# $ cp config/linux_arm.cmake config.cmake
#
# Next modify the according entries, and then compile by
#
# $ mkdir build; cd build
# $ cmake ..
# $ cmake --build .
#
# Specify `cmake --build . --parallel N` to set the number of parallel compilation jobs.
# Default is derived from CPUs available.
#
#-------------------------------------------------------------------------------

#---------------------------------------------
# Arm flags
#---------------------------------------------
# Set the correct C and CXX flags according to your Arm processor's architecture
# e.g. "armv8-a"
set(CFLAGS "-march=armv8-a" CACHE STRING "CFLAGS")
set(CXXFLAGS "-march=armv8-a" CACHE STRING "CXXFLAGS")

#---------------------------------------------
# GPU support
#---------------------------------------------
set(USE_CUDA OFF CACHE BOOL "Build with CUDA support")
set(USE_CUDNN OFF CACHE BOOL "Build with cudnn support, if found")

# Target NVIDIA GPU achitecture.
# Valid options are "Auto" for autodetection, "All" for all available
# architectures or a list of architectures by compute capability number, such as
# "7.0" or "7.0;7.5" as well as name, such as "Volta" or "Volta;Turing".
# The value specified here is passed to cmake's CUDA_SELECT_NVCC_ARCH_FLAGS to
# obtain the compilation flags for nvcc.
#
# When compiling on a machine without GPU, autodetection will fail and you
# should instead specify the target architecture manually to avoid excessive
# compilation times.
set(MXNET_CUDA_ARCH "Auto" CACHE STRING "Target NVIDIA GPU achitecture")

#---------------------------------------------
# Common libraries
#---------------------------------------------
set(USE_BLAS "open" CACHE STRING "BLAS Vendor")

set(USE_OPENCV ON CACHE BOOL "Build with OpenCV support")
set(OPENCV_ROOT "" CACHE BOOL "OpenCV install path. Supports autodetection.")

set(USE_OPENMP ON CACHE BOOL "Build with Openmp support")

set(USE_MKL_IF_AVAILABLE ON CACHE BOOL "Use Intel MKL if found")
set(USE_MKLDNN ON CACHE BOOL "Build with MKL-DNN support")

set(USE_LAPACK ON CACHE BOOL "Build with lapack support")

set(USE_TVM_OP OFF CACHE BOOL "Enable use of TVM operator build system.")

# Integrate MKLDNN with Arm Performance Libraries
# Note that APL needs to be added to LD_LIBRARY_PATH
set(MKLDNN_USE_APL OFF CACHE BOOL "Integrate MKLDNN with Arm Performance Libraries")

# Integrate MKLDNN with Arm Compute Library
set(ENV{ACL_ROOT_DIR} "")
set(MKLDNN_USE_ACL OFF CACHE BOOL "Integrate MKLDNN with Arm Compute Library")

#---------------------
# Compilers
#--------------------
set(CMAKE_GENERATOR "Ninja" CACHE STRING "Build Tool Generator used by CMake")

# Compilers are usually autodetected. Uncomment and modify the next 3 lines to
# choose manually:

# set(CMAKE_C_COMPILER "" CACHE BOOL "C compiler")
# set(CMAKE_CXX_COMPILER "" CACHE BOOL "C++ compiler")
# set(CMAKE_CUDA_COMPILER "" CACHE BOOL "Cuda compiler (nvcc)")

# Uncomment the following line to compile with debug information
# set(CMAKE_BUILD_TYPE Debug CACHE STRING "CMake build type")

#---------------------------------------------
# CPU instruction sets: The support is autodetected if turned ON
#---------------------------------------------
set(USE_SSE OFF CACHE BOOL "Build with x86 SSE instruction support")
set(USE_F16C OFF CACHE BOOL "Build with x86 F16C instruction support")


#----------------------------
# distributed computing
#----------------------------
set(USE_DIST_KVSTORE OFF CACHE BOOL "Build with DIST_KVSTORE support")


#----------------------------
# performance settings
#----------------------------
set(USE_OPERATOR_TUNING ON CACHE BOOL "Enable auto-tuning of operators")
set(USE_GPERFTOOLS OFF CACHE BOOL "Build with GPerfTools support")
set(USE_JEMALLOC OFF CACHE BOOL "Build with Jemalloc support")


#----------------------------
# additional operators
#----------------------------
# path to folders containing projects specific operators that you don't want to
# put in src/operators
SET(EXTRA_OPERATORS "" CACHE PATH "EXTRA OPERATORS PATH")


#----------------------------
# other features
#----------------------------
# Create C++ interface package
set(USE_CPP_PACKAGE OFF CACHE BOOL "Build C++ Package")

# Use int64_t type to represent the total number of elements in a tensor
# This will cause performance degradation reported in issue #14496
# Set to 1 for large tensor with tensor size greater than INT32_MAX i.e. 2147483647
# Note: the size of each dimension is still bounded by INT32_MAX
set(USE_INT64_TENSOR_SIZE OFF CACHE BOOL "Use int64_t to represent the total number of elements in a tensor")

# Other GPU features
set(USE_NCCL "Use NVidia NCCL with CUDA" OFF)
set(NCCL_ROOT "" CACHE BOOL "NCCL install path. Supports autodetection.")
set(ENABLE_CUDA_RTC OFF CACHE BOOL "Build with CUDA runtime compilation support")
set(USE_NVTX OFF CACHE BOOL "Build with NVTX support")
1 change: 1 addition & 0 deletions docs/python_docs/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ dependencies:
- sphinx==2.4.0
- matplotlib
- notebook
- Jinja2==2.11.3
- pip:
- nbconvert==5.6.1
- nbsphinx==0.4.3
Expand Down