diff --git a/CMakeLists.txt b/CMakeLists.txt index a0f153a01c6c..dbf9ecd85da9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -299,6 +299,16 @@ if(USE_MKLDNN) endif() function(load_mkldnn) + if (MKLDNN_USE_ACL) + # C++ 14 is requried to build with ACL + # Also ACL_ROOT_DIR need to be set + set(CMAKE_CXX_STANDARD 14) + set(DNNL_AARCH64_USE_ACL ON CACHE INTERNAL "" FORCE) + endif() + if (MKLDNN_USE_APL) + # APL needs to be added to LD_LIBRARY_PATH + set(DNNL_BLAS_VENDOR "ARMPL" CACHE INTERNAL "" FORCE) + endif() set(MKLDNN_BUILD_TESTS OFF CACHE INTERNAL "" FORCE) set(MKLDNN_BUILD_EXAMPLES OFF CACHE INTERNAL "" FORCE) set(MKLDNN_ARCH_OPT_FLAGS "" CACHE INTERNAL "" FORCE) diff --git a/config/distribution/linux_aarch64_cpu.cmake b/config/distribution/linux_aarch64_cpu.cmake index 98da9c2a60a8..391d34164161 100644 --- a/config/distribution/linux_aarch64_cpu.cmake +++ b/config/distribution/linux_aarch64_cpu.cmake @@ -30,17 +30,13 @@ set(USE_F16C OFF CACHE BOOL "Build with x86 F16C instruction support") set(USE_DIST_KVSTORE OFF CACHE BOOL "Build with DIST_KVSTORE support") set(USE_MKLDNN ON CACHE BOOL "Build with MKL-DNN support") -# Uncomment the following line to build MKLDNN with APL support -# APL can be downloaded from https://developer.arm.com/tools-and-software/server-and-hpc/downloads/arm-performance-libraries -# APL needs to be added to LD_LIBRARY_PATH -## set(DNNL_BLAS_VENDOR “ARMPL” CACHE STRING “Build MKLDNN with Arm Performance Libraries as the BLAS library”) -# Uncomment the following lines to build MKLDNN with ACL support -# C++ 14 is requried to build with ACL and MKLDNN recommends building ACL from source rather than -# using the pre-built binaries from https://github.com/ARM-software/ComputeLibrary/releases -# If pre-built binaries are used anyways, make sure to copy and rename the appropriate binaries -# folder from /lib/ to /build +# Pre-built binaries are available from https://github.com/ARM-software/ComputeLibrary/releases +# Make sure to copy and rename the appropriate binaries folder +# from /lib/ to /build # The resulting acl root folder should look something like: # LICENSE README.md arm_compute build examples include lib scripts support utils -## set(CMAKE_CXX_STANDARD 14) -## set(ENV{ACL_ROOT_DIR} ~/arm_compute-v21.02-bin-linux) -## set(DNNL_AARCH64_USE_ACL ON CACHE BOOL “Build MKLDNN with Arm Compute Library integration”) +set(ENV{ACL_ROOT_DIR} "") +set(MKLDNN_USE_ACL OFF CACHE BOOL "Integrate MKLDNN with Arm Compute Library") +# APL can be downloaded from https://developer.arm.com/tools-and-software/server-and-hpc/downloads/arm-performance-libraries +# Note that APL needs to be added to LD_LIBRARY_PATH +set(MKLDNN_USE_APL OFF CACHE BOOL "Integrate MKLDNN with Arm Performance Libraries") diff --git a/config/linux_arm.cmake b/config/linux_arm.cmake new file mode 100644 index 000000000000..6e6c01d877c4 --- /dev/null +++ b/config/linux_arm.cmake @@ -0,0 +1,149 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#------------------------------------------------------------------------------- +# Template configuration for compiling MXNet +# +# If you want to change the configuration, please use the following steps. +# Assume you are on the root directory of mxnet. First copy this file so that +# any local changes will be ignored by git +# +# $ cp config/linux_arm.cmake config.cmake +# +# Next modify the according entries, and then compile by +# +# $ mkdir build; cd build +# $ cmake .. +# $ cmake --build . +# +# Specify `cmake --build . --parallel N` to set the number of parallel compilation jobs. +# Default is derived from CPUs available. +# +#------------------------------------------------------------------------------- + +#--------------------------------------------- +# Arm flags +#--------------------------------------------- +# Set the correct C and CXX flags according to your Arm processor's architecture +# e.g. "armv8-a" +set(CFLAGS "-march=armv8-a" CACHE STRING "CFLAGS") +set(CXXFLAGS "-march=armv8-a" CACHE STRING "CXXFLAGS") + +#--------------------------------------------- +# GPU support +#--------------------------------------------- +set(USE_CUDA OFF CACHE BOOL "Build with CUDA support") +set(USE_CUDNN OFF CACHE BOOL "Build with cudnn support, if found") + +# Target NVIDIA GPU achitecture. +# Valid options are "Auto" for autodetection, "All" for all available +# architectures or a list of architectures by compute capability number, such as +# "7.0" or "7.0;7.5" as well as name, such as "Volta" or "Volta;Turing". +# The value specified here is passed to cmake's CUDA_SELECT_NVCC_ARCH_FLAGS to +# obtain the compilation flags for nvcc. +# +# When compiling on a machine without GPU, autodetection will fail and you +# should instead specify the target architecture manually to avoid excessive +# compilation times. +set(MXNET_CUDA_ARCH "Auto" CACHE STRING "Target NVIDIA GPU achitecture") + +#--------------------------------------------- +# Common libraries +#--------------------------------------------- +set(USE_BLAS "open" CACHE STRING "BLAS Vendor") + +set(USE_OPENCV ON CACHE BOOL "Build with OpenCV support") +set(OPENCV_ROOT "" CACHE BOOL "OpenCV install path. Supports autodetection.") + +set(USE_OPENMP ON CACHE BOOL "Build with Openmp support") + +set(USE_MKL_IF_AVAILABLE ON CACHE BOOL "Use Intel MKL if found") +set(USE_MKLDNN ON CACHE BOOL "Build with MKL-DNN support") + +set(USE_LAPACK ON CACHE BOOL "Build with lapack support") + +set(USE_TVM_OP OFF CACHE BOOL "Enable use of TVM operator build system.") + +# Integrate MKLDNN with Arm Performance Libraries +# Note that APL needs to be added to LD_LIBRARY_PATH +set(MKLDNN_USE_APL OFF CACHE BOOL "Integrate MKLDNN with Arm Performance Libraries") + +# Integrate MKLDNN with Arm Compute Library +set(ENV{ACL_ROOT_DIR} "") +set(MKLDNN_USE_ACL OFF CACHE BOOL "Integrate MKLDNN with Arm Compute Library") + +#--------------------- +# Compilers +#-------------------- +set(CMAKE_GENERATOR "Ninja" CACHE STRING "Build Tool Generator used by CMake") + +# Compilers are usually autodetected. Uncomment and modify the next 3 lines to +# choose manually: + +# set(CMAKE_C_COMPILER "" CACHE BOOL "C compiler") +# set(CMAKE_CXX_COMPILER "" CACHE BOOL "C++ compiler") +# set(CMAKE_CUDA_COMPILER "" CACHE BOOL "Cuda compiler (nvcc)") + +# Uncomment the following line to compile with debug information +# set(CMAKE_BUILD_TYPE Debug CACHE STRING "CMake build type") + +#--------------------------------------------- +# CPU instruction sets: The support is autodetected if turned ON +#--------------------------------------------- +set(USE_SSE OFF CACHE BOOL "Build with x86 SSE instruction support") +set(USE_F16C OFF CACHE BOOL "Build with x86 F16C instruction support") + + +#---------------------------- +# distributed computing +#---------------------------- +set(USE_DIST_KVSTORE OFF CACHE BOOL "Build with DIST_KVSTORE support") + + +#---------------------------- +# performance settings +#---------------------------- +set(USE_OPERATOR_TUNING ON CACHE BOOL "Enable auto-tuning of operators") +set(USE_GPERFTOOLS OFF CACHE BOOL "Build with GPerfTools support") +set(USE_JEMALLOC OFF CACHE BOOL "Build with Jemalloc support") + + +#---------------------------- +# additional operators +#---------------------------- +# path to folders containing projects specific operators that you don't want to +# put in src/operators +SET(EXTRA_OPERATORS "" CACHE PATH "EXTRA OPERATORS PATH") + + +#---------------------------- +# other features +#---------------------------- +# Create C++ interface package +set(USE_CPP_PACKAGE OFF CACHE BOOL "Build C++ Package") + +# Use int64_t type to represent the total number of elements in a tensor +# This will cause performance degradation reported in issue #14496 +# Set to 1 for large tensor with tensor size greater than INT32_MAX i.e. 2147483647 +# Note: the size of each dimension is still bounded by INT32_MAX +set(USE_INT64_TENSOR_SIZE OFF CACHE BOOL "Use int64_t to represent the total number of elements in a tensor") + +# Other GPU features +set(USE_NCCL "Use NVidia NCCL with CUDA" OFF) +set(NCCL_ROOT "" CACHE BOOL "NCCL install path. Supports autodetection.") +set(ENABLE_CUDA_RTC OFF CACHE BOOL "Build with CUDA runtime compilation support") +set(USE_NVTX OFF CACHE BOOL "Build with NVTX support") diff --git a/docs/python_docs/environment.yml b/docs/python_docs/environment.yml index 7856a076889b..6c4a5beebe66 100644 --- a/docs/python_docs/environment.yml +++ b/docs/python_docs/environment.yml @@ -26,6 +26,7 @@ dependencies: - sphinx==2.4.0 - matplotlib - notebook +- Jinja2==2.11.3 - pip: - nbconvert==5.6.1 - nbsphinx==0.4.3