Skip to content

Commit

Permalink
Merge branch 'develop' into op2func_refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
chenwhql authored Oct 20, 2021
2 parents a49fd44 + 3f2d6a3 commit 373f9c1
Show file tree
Hide file tree
Showing 367 changed files with 20,889 additions and 3,280 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VER
option(WITH_DGC "Use DGC(Deep Gradient Compression) or not" ${WITH_DISTRIBUTE})
option(SANITIZER_TYPE "Choose the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined" OFF)
option(WITH_LITE "Compile Paddle Fluid with Lite Engine" OFF)
option(WITH_CINN "Compile PaddlePaddle with CINN" OFF)
option(WITH_NCCL "Compile PaddlePaddle with NCCL support" ON)
option(WITH_RCCL "Compile PaddlePaddle with RCCL support" ON)
option(WITH_XPU_BKCL "Compile PaddlePaddle with BAIDU KUNLUN XPU BKCL" OFF)
Expand Down Expand Up @@ -299,6 +300,10 @@ if(WITH_GPU)
endif()
endif()

if(WITH_CINN)
include(cinn)
endif()

if(WITH_ROCM)
include(hip)
include(miopen) # set miopen libraries, must before configure
Expand Down
112 changes: 112 additions & 0 deletions cmake/cinn.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

if (NOT WITH_CINN)
return()
endif()

# TODO(zhhsplendid): CINN has lots of warnings during early development.
# They will be treated as errors under paddle. We set no-error now and we will
# clean the code in the future.
add_definitions(-w)

######################################
# Build CINN from Git External Project
######################################
include(ExternalProject)
set(CINN_SOURCE_DIR ${THIRD_PARTY_PATH}/CINN)
# TODO(zhhsplendid): Modify git tag after we have release tag
set(CINN_GIT_TAG 3f004bfa3ed273ecf1de8e7b946433038c79b84f)
set(CINN_OPTIONAL_ARGS -DWITH_CUDA=${WITH_GPU} -DWITH_CUDNN=${WITH_GPU} -DPUBLISH_LIBS=ON)
set(CINN_BUILD_COMMAND $(MAKE) cinncore -j && $(MAKE) cinnapi -j)
ExternalProject_Add(
external_cinn
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "${GIT_URL}/PaddlePaddle/CINN.git"
GIT_TAG ${CINN_GIT_TAG}
PREFIX ${CINN_SOURCE_DIR}
UPDATE_COMMAND ""
BUILD_COMMAND ${CINN_BUILD_COMMAND}
INSTALL_COMMAND ""
CMAKE_ARGS ${CINN_OPTIONAL_ARGS})



ExternalProject_Get_property(external_cinn BINARY_DIR)
ExternalProject_Get_property(external_cinn SOURCE_DIR)
set(CINN_BINARY_DIR ${BINARY_DIR})
set(CINN_SOURCE_DIR ${SOURCE_DIR})

message(STATUS "CINN BINARY_DIR: ${CINN_BINARY_DIR}")
message(STATUS "CINN SOURCE_DIR: ${CINN_SOURCE_DIR}")


#########################
# Add CINN's dependencies
#########################

# Add absl
set(ABSL_LIB_NAMES
hash
wyhash
city
strings
throw_delegate
bad_any_cast_impl
bad_optional_access
bad_variant_access
raw_hash_set
)
set(ABSL_LIB_DIR "${CINN_BINARY_DIR}/dist/third_party/absl/lib")
set(ABSL_INCLUDE_DIR "${CINN_BINARY_DIR}/dist/third_party/absl/include")
add_library(absl STATIC IMPORTED GLOBAL)
set_target_properties(absl PROPERTIES IMPORTED_LOCATION ${ABSL_LIB_DIR}/libabsl_base.a)
foreach(lib_name ${ABSL_LIB_NAMES})
target_link_libraries(absl INTERFACE ${ABSL_LIB_DIR}/libabsl_${lib_name}.a)
endforeach()
include_directories(${ABSL_INCLUDE_DIR})

# Add isl
set(ISL_LIB_DIR "${CINN_BINARY_DIR}/dist/third_party/isl/lib")
set(ISL_INCLUDE_DIR "${CINN_BINARY_DIR}/dist/third_party/isl/include")
add_library(isl STATIC IMPORTED GLOBAL)
set_target_properties(isl PROPERTIES IMPORTED_LOCATION ${ISL_LIB_DIR}/libisl.a)
include_directories(${ISL_INCLUDE_DIR})

# Add LLVM
set(LLVM_LIB_NAMES
ExecutionEngine
)
set(LLVM_LIB_DIR "${CINN_BINARY_DIR}/dist/third_party/llvm/lib")
set(LLVM_INCLUDE_DIR "${CINN_BINARY_DIR}/dist/third_party/llvm/include")
add_library(llvm STATIC IMPORTED GLOBAL)
set_target_properties(llvm PROPERTIES IMPORTED_LOCATION ${LLVM_LIB_DIR}/libLLVMCore.a)
foreach(lib_name ${LLVM_LIB_NAMES})
target_link_libraries(llvm INTERFACE ${LLVM_LIB_DIR}/libLLVM${lib_name}.a)
endforeach()
include_directories(${LLVM_INCLUDE_DIR})

######################################################
# Put external_cinn and dependencies together as a lib
######################################################

set(CINN_LIB_NAME "libcinnapi.so")
set(CINN_LIB_LOCATION "${CINN_BINARY_DIR}/dist/cinn/lib")
set(CINN_INCLUDE_DIR "${CINN_BINARY_DIR}/dist/cinn/include")

add_library(cinn SHARED IMPORTED GLOBAL)
set_target_properties(cinn PROPERTIES IMPORTED_LOCATION "${CINN_LIB_LOCATION}/${CINN_LIB_NAME}")
include_directories(${CINN_INCLUDE_DIR})
add_dependencies(cinn external_cinn absl isl llvm glog gflag)

34 changes: 34 additions & 0 deletions cmake/external/ascend.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -85,5 +85,39 @@ if(WITH_ASCEND_CL)
ADD_LIBRARY(acl_op_compiler SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET acl_op_compiler PROPERTY IMPORTED_LOCATION ${acl_op_compiler_lib})
add_custom_target(extern_ascend_cl DEPENDS ascendcl acl_op_compiler)
endif()

if (WITH_ASCEND_CL)
macro(find_ascend_toolkit_version ascend_toolkit_version_info)
file(READ ${ascend_toolkit_version_info} ASCEND_TOOLKIT_VERSION_CONTENTS)
string(REGEX MATCH "version=([0-9]+\.[0-9]+\.[0-9]+\.[a-z]*[0-9]*)" ASCEND_TOOLKIT_VERSION "${ASCEND_TOOLKIT_VERSION_CONTENTS}")
string(REGEX REPLACE "version=([0-9]+\.[0-9]+\.[0-9]+\.[a-z]*[0-9]*)" "\\1" ASCEND_TOOLKIT_VERSION "${ASCEND_TOOLKIT_VERSION}")
string(REGEX REPLACE "[a-z|\.]" "" CANN_VERSION ${ASCEND_TOOLKIT_VERSION})
add_definitions("-DCANN_VERSION_CODE=${CANN_VERSION}")
if(NOT ASCEND_TOOLKIT_VERSION)
set(ASCEND_TOOLKIT_VERSION "???")
else()
message(STATUS "Current Ascend Toolkit version is ${ASCEND_TOOLKIT_VERSION}")
endif()
endmacro()

macro(find_ascend_driver_version ascend_driver_version_info)
file(READ ${ascend_driver_version_info} ASCEND_DRIVER_VERSION_CONTENTS)
string(REGEX MATCH "Version=([0-9]+\.[0-9]+\.[0-9]+)" ASCEND_DRIVER_VERSION "${ASCEND_DRIVER_VERSION_CONTENTS}")
string(REGEX REPLACE "Version=([0-9]+\.[0-9]+\.[0-9]+)" "\\1" ASCEND_DRIVER_VERSION "${ASCEND_DRIVER_VERSION}")
if(NOT ASCEND_DRIVER_VERSION)
set(ASCEND_DRIVER_VERSION "???")
else()
message(STATUS "Current Ascend Driver version is ${ASCEND_DRIVER_VERSION}")
endif()
endmacro()

if (WITH_ARM)
set(ASCEND_TOOLKIT_DIR ${ASCEND_DIR}/ascend-toolkit/latest/arm64-linux)
else()
set(ASCEND_TOOLKIT_DIR ${ASCEND_DIR}/ascend-toolkit/latest/x86_64-linux)
endif()

find_ascend_toolkit_version(${ASCEND_TOOLKIT_DIR}/ascend_toolkit_install.info)
find_ascend_driver_version(${ASCEND_DIR}/driver/version.info)
endif()
2 changes: 1 addition & 1 deletion cmake/external/lite.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
GIT_TAG ${LITE_GIT_TAG}
PREFIX ${LITE_SOURCES_DIR}
UPDATE_COMMAND ""
PATCH_COMMAND sed -i "s?NNadapter_bridges_path = os.path.abspath('..')+\"\/lite\/kernels\/nnadapter\/bridges\/paddle_use_bridges.h\"?NNadapter_bridges_path = os.path.abspath(\'..\')+\"\/extern_lite\/lite\/kernels\/nnadapter\/bridges\/paddle_use_bridges.h\"?" ${LITE_SOURCES_DIR}/src/extern_lite//lite/tools/cmake_tools/record_supported_kernel_op.py && sed -i "/general::ssa::ConvertToSSA(cpp_prog)$<SEMICOLON>/d" ${LITE_SOURCES_DIR}/src/extern_lite/lite/model_parser/model_parser.cc
PATCH_COMMAND sed -i "s?NNadapter_bridges_path = os.path.abspath('..')+\"\/lite\/kernels\/nnadapter\/bridges\/paddle_use_bridges.h\"?NNadapter_bridges_path = os.path.abspath(\'..\')+\"\/extern_lite\/lite\/kernels\/nnadapter\/bridges\/paddle_use_bridges.h\"?" ${LITE_SOURCES_DIR}/src/extern_lite//lite/tools/cmake_tools/record_supported_kernel_op.py
BUILD_COMMAND ${LITE_BUILD_COMMAND}
INSTALL_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
Expand Down
51 changes: 51 additions & 0 deletions cmake/external/utf8proc.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

INCLUDE(ExternalProject)

SET(UTF8PROC_PREFIX_DIR ${THIRD_PARTY_PATH}/utf8proc)
SET(UTF8PROC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/utf8proc)
# As we add extra features for utf8proc, we use the non-official repo
SET(UTF8PROC_REPOSITORY ${GIT_URL}/JuliaStrings/utf8proc.git)
SET(UTF8PROC_TAG v2.6.1)

IF(WIN32)
SET(UTF8PROC_LIBRARIES "${UTF8PROC_INSTALL_DIR}/lib/utf8proc_static.lib")
add_definitions(-DUTF8PROC_STATIC)
ELSE(WIN32)
SET(UTF8PROC_LIBRARIES "${UTF8PROC_INSTALL_DIR}/lib/libutf8proc.a")
ENDIF(WIN32)

INCLUDE_DIRECTORIES(${UTF8PROC_INSTALL_DIR}/include)

ExternalProject_Add(
extern_utf8proc
${EXTERNAL_PROJECT_LOG_ARGS}
${SHALLOW_CLONE}
GIT_REPOSITORY ${UTF8PROC_REPOSITORY}
GIT_TAG ${UTF8PROC_TAG}
PREFIX ${UTF8PROC_PREFIX_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DBUILD_SHARED=ON
-DBUILD_STATIC=ON
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_INSTALL_PREFIX:PATH=${UTF8PROC_INSTALL_DIR}
-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
BUILD_BYPRODUCTS ${UTF8PROC_LIBRARIES}
)

ADD_LIBRARY(utf8proc STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET utf8proc PROPERTY IMPORTED_LOCATION ${UTF8PROC_LIBRARIES})
ADD_DEPENDENCIES(utf8proc extern_utf8proc)
14 changes: 13 additions & 1 deletion cmake/inference_lib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ function(copy_part_of_thrid_party TARGET DST)
SRCS ${GLOG_INCLUDE_DIR} ${GLOG_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib)

set(dst_dir "${DST}/third_party/install/utf8proc")
copy(${TARGET}
SRCS ${UTF8PROC_INSTALL_DIR}/include ${UTF8PROC_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib)

if (WITH_CRYPTO)
set(dst_dir "${DST}/third_party/install/cryptopp")
copy(${TARGET}
Expand Down Expand Up @@ -353,7 +358,9 @@ function(version version_file)
"WITH_MKL: ${WITH_MKL}\n"
"WITH_MKLDNN: ${WITH_MKLDNN}\n"
"WITH_GPU: ${WITH_GPU}\n"
"WITH_ROCM: ${WITH_ROCM}\n")
"WITH_ROCM: ${WITH_ROCM}\n"
"WITH_ASCEND_CL: ${WITH_ASCEND_CL}\n"
"WITH_ASCEND_CXX11: ${WITH_ASCEND_CXX11}\n")
if(WITH_GPU)
file(APPEND ${version_file}
"CUDA version: ${CUDA_VERSION}\n"
Expand All @@ -364,6 +371,11 @@ function(version version_file)
"HIP version: ${HIP_VERSION}\n"
"MIOpen version: v${MIOPEN_MAJOR_VERSION}.${MIOPEN_MINOR_VERSION}\n")
endif()
if(WITH_ASCEND_CL)
file(APPEND ${version_file}
"Ascend Toolkit version: ${ASCEND_TOOLKIT_VERSION}\n"
"Ascend Driver version: ${ASCEND_DRIVER_VERSION}\n")
endif()
file(APPEND ${version_file} "CXX compiler version: ${CMAKE_CXX_COMPILER_VERSION}\n")
if(TENSORRT_FOUND)
file(APPEND ${version_file}
Expand Down
2 changes: 0 additions & 2 deletions cmake/miopen.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ find_path(MIOPEN_INCLUDE_DIR "miopen/miopen.h"
NO_DEFAULT_PATH
)

get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)

find_library(MIOPEN_LIBRARY NAMES "libMIOpen.so"
PATHS ${MIOPEN_ROOT} ${MIOPEN_ROOT}/lib ${MIOPEN_ROOT}/lib64 ${__libpath_hist}
$ENV{MIOPEN_ROOT} $ENV{MIOPEN_ROOT}/lib $ENV{MIOPEN_ROOT}/lib64
Expand Down
3 changes: 2 additions & 1 deletion cmake/operators.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ function(op_library TARGET)
list(REMOVE_ITEM hip_srcs "cholesky_op.cu")
list(REMOVE_ITEM hip_srcs "matrix_rank_op.cu")
list(REMOVE_ITEM hip_srcs "svd_op.cu")
list(REMOVE_ITEM hip_srcs "qr_op.cu")
list(REMOVE_ITEM hip_srcs "eigh_op.cu")
list(REMOVE_ITEM hip_srcs "multinomial_op.cu")
list(REMOVE_ITEM hip_srcs "decode_jpeg_op.cu")
Expand Down Expand Up @@ -216,7 +217,7 @@ function(op_library TARGET)
"fusion_transpose_flatten_concat_op" "fusion_conv_inception_op"
"sync_batch_norm_op" "sparse_attention_op" "dgc_op" "fused_fc_elementwise_layernorm_op"
"skip_layernorm_op" "multihead_matmul_op" "fusion_group_op" "fused_bn_activation_op" "fused_embedding_eltwise_layernorm_op" "fusion_gru_op" "fusion_lstm_op"
"fused_bn_add_activation_op")
"fused_bn_add_activation_op" "resnet_unit_op")
if ("${TARGET}" STREQUAL "${manual_pybind_op}")
set(pybind_flag 1)
endif()
Expand Down
4 changes: 4 additions & 0 deletions cmake/third_party.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,10 @@ include(external/threadpool)# download threadpool
include(external/dlpack) # download dlpack
include(external/xxhash) # download, build, install xxhash
include(external/warpctc) # download, build, install warpctc
include(external/utf8proc) # download, build, install utf8proc

list(APPEND third_party_deps extern_eigen3 extern_gflags extern_glog extern_boost extern_xxhash)
list(APPEND third_party_deps extern_zlib extern_dlpack extern_warpctc extern_threadpool extern_utf8proc)
include(external/lapack) # download, build, install lapack

list(APPEND third_party_deps extern_eigen3 extern_gflags extern_glog extern_boost extern_xxhash)
Expand Down
Binary file removed log
Binary file not shown.
18 changes: 3 additions & 15 deletions paddle/fluid/distributed/service/brpc_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,23 +138,11 @@ void SerializeSelectedRows(framework::Variable* var,
var_data->clear();
var_data->resize(rows->size() * sizeof(int64_t));
char* data_ptr = const_cast<char*>(var_data->data());

if (platform::is_cpu_place(tensor->place())) {
memcpy(data_ptr, &(*rows)[0], rows->size() * sizeof(int64_t));
} else {
#ifdef PADDLE_WITH_CUDA
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(platform::CPUPlace(), data_ptr,
BOOST_GET_CONST(platform::CUDAPlace, tensor->place()),
&(*rows)[0], rows->size() * sizeof(int64_t), stream);
#endif
}
memcpy(data_ptr, &((*rows)[0]), rows->size() * sizeof(int64_t));
var_msg->set_data_type(static_cast<VarMsg::Type>(tensor->type()));
for (auto& dim : framework::vectorize(tensor->dims())) {
var_msg->add_dims(dim);
}

// IO Buffer
if (platform::is_cpu_place(tensor->place())) {
auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
Expand Down Expand Up @@ -273,8 +261,8 @@ void DeserializeSelectedRows(framework::Variable* var, const VarMsg& msg,
auto* slr = var->GetMutable<framework::SelectedRows>();
framework::Tensor* tensor = slr->mutable_value();
slr->set_height(msg.slr_height());
std::vector<int64_t> tmp_rows(msg.slr_height());
memcpy(&tmp_rows[0], msg.data().data(), msg.slr_height() * sizeof(int64_t));
std::vector<int64_t> tmp_rows(msg.dims()[0]);
memcpy(tmp_rows.data(), msg.data().data(), msg.dims()[0] * sizeof(int64_t));
slr->set_rows(tmp_rows);
std::vector<int> vec_dim;
for (auto& x : msg.dims()) {
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ proto_library(data_feed_proto SRCS data_feed.proto)
proto_library(trainer_desc_proto SRCS trainer_desc.proto DEPS framework_proto
data_feed_proto)

cc_library(string_array SRCS string_array.cc DEPS utf8proc)

cc_library(ddim SRCS ddim.cc DEPS eigen3 boost enforce)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
if(WITH_GPU)
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/details/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ set(IR_PASS_DEPS graph_viz_pass multi_devices_graph_pass
coalesce_grad_tensor_pass fuse_all_reduce_op_pass backward_optimizer_op_deps_pass
fuse_adam_op_pass fuse_sgd_op_pass fuse_momentum_op_pass
sync_batch_norm_pass runtime_context_cache_pass graph_to_program_pass
paddle_to_cinn_pass fix_op_run_order_pass)
fix_op_run_order_pass build_cinn_pass)
if(NOT APPLE AND NOT WIN32 AND (WITH_GPU OR WITH_ROCM))
set(IR_PASS_DEPS ${IR_PASS_DEPS} fusion_group_pass)
endif()
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/framework/details/build_strategy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {

// Note: This pass is used to enable cinn.
if (FLAGS_use_cinn) {
AppendPass("paddle_to_cinn_pass");
AppendPass("build_cinn_pass");
}
SetCollectiveContext();
}
Expand Down Expand Up @@ -486,6 +486,7 @@ USE_PASS(fuse_momentum_op_pass);
USE_PASS(fuse_all_reduce_op_pass);
USE_PASS(runtime_context_cache_pass);
USE_PASS(add_reader_dependency_pass);
USE_PASS(build_cinn_pass);
#ifdef PADDLE_WITH_MKLDNN
USE_PASS(mkldnn_placement_pass);
#endif
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/framework/distributed_strategy.proto
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ message GradientScaleConfig {
// Else if sum, the gradient will accumulated among multiple
// devices.
optional string scale_strategy = 1 [ default = 'avg' ];
// The avg_loss flag is used to determine the position of average
// If scale_gradient is False, it will avg the loss@Grad before grad merge.
// Otherwise, it will do grad merge firstly, then avg the grad after merging.
optional bool scale_gradient = 2 [ default = false ];
}

message AsyncConfig {
Expand Down
Loading

0 comments on commit 373f9c1

Please sign in to comment.