Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/develop' into Cleanup__mkldnn_…
Browse files Browse the repository at this point in the history
…inplace_pass
  • Loading branch information
HulekJakub committed Dec 5, 2022
2 parents aa069cc + e707ee5 commit a849f10
Show file tree
Hide file tree
Showing 2,790 changed files with 29,849 additions and 34,836 deletions.
Empty file modified .pre-commit-config.yaml
100755 → 100644
Empty file.
4 changes: 2 additions & 2 deletions cmake/copyfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import glob
import os
import sys
import shutil
import glob
import sys


def main():
Expand Down
62 changes: 32 additions & 30 deletions cmake/external/lite.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,21 @@ if(LITE_WITH_XPU)
endif()
endif()

if(WITH_ARM)
if(LITE_WITH_XPU)
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.xpu)
elseif(LITE_WITH_NNADAPTER)
message("Enable LITE_WITH_NNADAPTER")
if(NNADAPTER_WITH_HUAWEI_ASCEND_NPU)
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.nnadapter)
endif()
else()
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8)
endif()
else()
set(LITE_OUTPUT_BIN_DIR inference_lite_lib)
endif()

if(LITE_WITH_NNADAPTER)
add_definitions(-DLITE_SUBGRAPH_WITH_NNADAPTER)
if(NNADAPTER_WITH_HUAWEI_ASCEND_NPU)
Expand All @@ -50,6 +65,12 @@ if(NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
set(LITE_PROJECT extern_lite)
set(LITE_PREFIX_DIR ${THIRD_PARTY_PATH}/lite)
set(LITE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/lite)
set(LITE_BINARY_DIR ${LITE_PREFIX_DIR}/src/extern_lite-build)
set(LITE_SOURCE_DIR ${LITE_PREFIX_DIR}/src/extern_lite)

set(LITE_SHARED_LIB
${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so
)

if(NOT LITE_GIT_TAG)
set(LITE_GIT_TAG 81ef66554099800c143a0feff6e0a491b3b0d12e)
Expand All @@ -61,7 +82,8 @@ if(NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)

# No quotes, so cmake can resolve it as a command with arguments.
if(WITH_ARM)
set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j)
set(LITE_BUILD_COMMAND ${CMAKE_COMMAND} --build . --target
publish_inference -j)
message(WARNING "BUILD_COMMAND: ${LITE_BUILD_COMMAND}")
set(LITE_OPTIONAL_ARGS
-DWITH_MKL=OFF
Expand Down Expand Up @@ -110,9 +132,11 @@ if(NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS}
${LITE_OPTIONAL_ARGS})
${LITE_OPTIONAL_ARGS}
BUILD_BYPRODUCTS ${LITE_SHARED_LIB})
else()
set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j)
set(LITE_BUILD_COMMAND ${CMAKE_COMMAND} --build . --target
publish_inference -j)
set(LITE_OPTIONAL_ARGS
-DWITH_MKL=ON
-DLITE_WITH_CUDA=OFF
Expand Down Expand Up @@ -157,28 +181,9 @@ if(NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS}
${LITE_OPTIONAL_ARGS})
${LITE_OPTIONAL_ARGS}
BUILD_BYPRODUCTS ${LITE_SHARED_LIB})
endif()
ExternalProject_Get_Property(${LITE_PROJECT} BINARY_DIR)
ExternalProject_Get_Property(${LITE_PROJECT} SOURCE_DIR)
set(LITE_BINARY_DIR ${BINARY_DIR})
set(LITE_SOURCE_DIR ${SOURCE_DIR})

endif()

if(WITH_ARM)
if(LITE_WITH_XPU)
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.xpu)
elseif(LITE_WITH_NNADAPTER)
message("Enable LITE_WITH_NNADAPTER")
if(NNADAPTER_WITH_HUAWEI_ASCEND_NPU)
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.nnadapter)
endif()
else()
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8)
endif()
else()
set(LITE_OUTPUT_BIN_DIR inference_lite_lib)
endif()

message(STATUS "Paddle-lite BINARY_DIR: ${LITE_BINARY_DIR}")
Expand All @@ -199,12 +204,9 @@ function(external_lite_libs alias path)
endfunction()

external_lite_libs(
lite_full_static
lite_full_shared
${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so
)
set(LITE_SHARED_LIB
${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so
)

if(LITE_WITH_NNADAPTER)
set(LITE_NNADAPTER_LIB
Expand All @@ -214,13 +216,13 @@ if(LITE_WITH_NNADAPTER)
lite_nnadapter
${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so
${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libhuawei_ascend_npu.so)
set(LITE_DEPS lite_full_static lite_nnadapter)
set(LITE_DEPS lite_full_shared lite_nnadapter)
set(LITE_NNADAPTER_NPU_LIB
${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libhuawei_ascend_npu.so
)
endif()
else()
set(LITE_DEPS lite_full_static)
set(LITE_DEPS lite_full_shared)
endif()

add_definitions(-DPADDLE_WITH_LITE)
Expand Down
7 changes: 3 additions & 4 deletions cmake/external/rocksdb.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,17 @@ ExternalProject_Add(
-DWITH_GFLAGS=OFF
-DCMAKE_CXX_FLAGS=${ROCKSDB_CMAKE_CXX_FLAGS}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
# BUILD_BYPRODUCTS ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/librocksdb.a
INSTALL_COMMAND
mkdir -p ${ROCKSDB_INSTALL_DIR}/lib/ && cp
${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/librocksdb.a ${ROCKSDB_LIBRARIES}
&& cp -r ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/include
${ROCKSDB_INSTALL_DIR}/
BUILD_IN_SOURCE 1
BYPRODUCTS ${ROCKSDB_LIBRARIES})

add_dependencies(extern_rocksdb snappy)
BUILD_BYPRODUCTS ${ROCKSDB_LIBRARIES})

add_library(rocksdb STATIC IMPORTED GLOBAL)

add_dependencies(extern_rocksdb snappy)
set_property(TARGET rocksdb PROPERTY IMPORTED_LOCATION ${ROCKSDB_LIBRARIES})
add_dependencies(rocksdb extern_rocksdb)

Expand Down
29 changes: 21 additions & 8 deletions cmake/phi.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -111,19 +111,31 @@ function(kernel_declare TARGET_LIST)
endfunction()

function(append_op_util_declare TARGET)
file(READ ${TARGET} target_content)
string(REGEX MATCH "(PD_REGISTER_ARG_MAPPING_FN)\\([ \t\r\n]*[a-z0-9_]*"
util_registrar "${target_content}")
if(NOT ${util_registrar} EQUAL "")
string(REPLACE "PD_REGISTER_ARG_MAPPING_FN" "PD_DECLARE_ARG_MAPPING_FN"
util_declare "${util_registrar}")
string(APPEND util_declare ");\n")
file(APPEND ${op_utils_header} "${util_declare}")
endif()
endfunction()

function(append_op_kernel_map_declare TARGET)
file(READ ${TARGET} target_content)
string(
REGEX
MATCH
"(PD_REGISTER_BASE_KERNEL_NAME|PD_REGISTER_ARG_MAPPING_FN)\\([ \t\r\n]*[a-z0-9_]*"
util_registrar
"(PD_REGISTER_BASE_KERNEL_NAME)\\([ \t\r\n]*[a-z0-9_]*,[ \\\t\r\n]*[a-z0-9_]*"
kernel_mapping_registrar
"${target_content}")
string(REPLACE "PD_REGISTER_ARG_MAPPING_FN" "PD_DECLARE_ARG_MAPPING_FN"
util_declare "${util_registrar}")
string(REPLACE "PD_REGISTER_BASE_KERNEL_NAME" "PD_DECLARE_BASE_KERNEL_NAME"
util_declare "${util_declare}")
string(APPEND util_declare ");\n")
file(APPEND ${op_utils_header} "${util_declare}")
if(NOT ${kernel_mapping_registrar} EQUAL "")
string(REPLACE "PD_REGISTER_BASE_KERNEL_NAME" "PD_DECLARE_BASE_KERNEL_NAME"
kernel_mapping_declare "${kernel_mapping_registrar}")
string(APPEND kernel_mapping_declare ");\n")
file(APPEND ${op_utils_header} "${kernel_mapping_declare}")
endif()
endfunction()

function(register_op_utils TARGET_NAME)
Expand All @@ -137,6 +149,7 @@ function(register_op_utils TARGET_NAME)
file(GLOB SIGNATURES "${PADDLE_SOURCE_DIR}/paddle/phi/ops/compat/*_sig.cc")
foreach(target ${SIGNATURES})
append_op_util_declare(${target})
append_op_kernel_map_declare(${target})
list(APPEND utils_srcs ${target})
endforeach()

Expand Down
3 changes: 3 additions & 0 deletions paddle/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ if(${len} GREATER_EQUAL 1)
target_link_libraries(${test_name}
"-Wl,-rpath,$<TARGET_FILE_DIR:${paddle_lib}>")
endif()
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
target_link_libraries(${test_name} ${PYTHON_LIBRARIES})
endif()
if(WITH_XPU)
target_link_libraries(${test_name} xpulib)
endif()
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/collective/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ endif()
if(WITH_NCCL OR WITH_RCCL)
cc_library(
processgroup_nccl
SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc
SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc static_check.cc
DEPS processgroup
processgroup_stream
place
Expand Down
104 changes: 0 additions & 104 deletions paddle/fluid/distributed/collective/NCCLTools.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,109 +44,5 @@ std::string SerializeNCCLUniqueId(const ncclUniqueId& ncclID) {
return oss.str();
}

void StaticCheckTensor(const phi::DenseTensor& tensor,
int rank,
int world_size) {
// place check
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(tensor.place()),
true,
platform::errors::InvalidArgument("Tensor should be in GPU place."));
// rank check
PADDLE_ENFORCE_GE(rank,
0,
platform::errors::InvalidArgument(
"Rank should be greater than or equal to 0."));
PADDLE_ENFORCE_LT(
rank,
world_size,
platform::errors::InvalidArgument("Rank is out of the process group."));
}

// static check for collective
void StaticCheckTensors(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size,
int out_size_factor,
int in_size_factor) {
// place check
PADDLE_ENFORCE_EQ(platform::is_gpu_place(out_tensor.place()),
true,
platform::errors::InvalidArgument(
"Output tensor should be in GPU place."));
PADDLE_ENFORCE_EQ(platform::is_gpu_place(in_tensor.place()),
true,
platform::errors::InvalidArgument(
"Input tensor should be in GPU place."));
// rank check
PADDLE_ENFORCE_GE(rank,
0,
platform::errors::InvalidArgument(
"Rank should be greater than or equal to 0."));
PADDLE_ENFORCE_LT(
rank,
world_size,
platform::errors::InvalidArgument("Rank is out of the process group."));
// shape check
int64_t out_size = out_tensor.numel();
PADDLE_ENFORCE_GT(out_size,
0,
platform::errors::InvalidArgument(
"Size of output tensor should be greater than 0."));
int64_t in_size = in_tensor.numel();
PADDLE_ENFORCE_GT(in_size,
0,
platform::errors::InvalidArgument(
"Size of input tensor should be greater than 0."));
PADDLE_ENFORCE_EQ(
out_size * out_size_factor,
in_size * in_size_factor,
platform::errors::InvalidArgument(
"Input and output tensors should have matching sizes."));
// dtype check
PADDLE_ENFORCE_EQ(
out_tensor.dtype(),
in_tensor.dtype(),
platform::errors::InvalidArgument(
"Input and output tensors should have the same data type."));
}

void StaticCheckTensorsSameShape(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size) {
StaticCheckTensors(out_tensor,
in_tensor,
rank,
world_size,
/*out_size_factor*/ 1,
/*in_size_factor*/ 1);
}

void StaticCheckTensorsScatterLikeShape(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size) {
StaticCheckTensors(out_tensor,
in_tensor,
rank,
world_size,
/*out_size_factor*/ world_size,
/*in_size_factor*/ 1);
}

void StaticCheckTensorsGatherLikeShape(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size) {
StaticCheckTensors(out_tensor,
in_tensor,
rank,
world_size,
/*out_size_factor*/ 1,
/*in_size_factor*/ world_size);
}

} // namespace distributed
} // namespace paddle
27 changes: 0 additions & 27 deletions paddle/fluid/distributed/collective/NCCLTools.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,32 +63,5 @@ ncclRedOp_t ToNCCLRedType(ReduceOp reduction);

std::string SerializeNCCLUniqueId(const ncclUniqueId& ncclID);

// static check for p2p
void StaticCheckTensor(const phi::DenseTensor& tensor,
int rank,
int world_size);

// static check for collective
void StaticCheckTensors(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size,
int out_size_factor,
int in_size_factor);

void StaticCheckTensorsSameShape(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size);

void StaticCheckTensorsScatterLikeShape(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size);

void StaticCheckTensorsGatherLikeShape(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size);
} // namespace distributed
} // namespace paddle
Loading

0 comments on commit a849f10

Please sign in to comment.