Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/develop' into el_add_act_fuse
Browse files Browse the repository at this point in the history
  • Loading branch information
tsocha committed Mar 10, 2022
2 parents a813e62 + bd4dc3b commit eb6b5c1
Show file tree
Hide file tree
Showing 815 changed files with 38,324 additions and 15,959 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@ paddle/fluid/eager/api/generated/*
paddle/fluid/op_use_default_grad_maker_DEV.spec
paddle/fluid/op_use_default_grad_maker_PR.spec
paddle/phi/api/backward/backward_api.h
paddle/phi/api/backward/sparse_bw_api.h
paddle/phi/api/include/api.h
paddle/phi/api/include/sparse_api.h
paddle/phi/api/lib/api.cc
paddle/phi/api/lib/dygraph_api.*
paddle/phi/api/lib/backward_api.cc
paddle/phi/api/lib/sparse_api.cc
paddle/phi/api/lib/sparse_bw_api.cc
paddle/phi/extension.h
paddle/phi/include/*
paddle/phi/infermeta/generated.*
Expand Down Expand Up @@ -54,6 +56,7 @@ paddle/infrt/dialect/pd_ops.td
paddle/infrt/dialect/phi/ir/phi_cpu_kernels.td
paddle/infrt/dialect/phi/ir/phi_gpu_kernels.td
tools/infrt/kernels.json
tools/infrt/kernel_signature.json
paddle/infrt/dialect/pd_ops_info.h
.lit_test_times.txt
paddle/infrt/tests/dialect/Output
Expand Down
4 changes: 2 additions & 2 deletions cmake/external/llvm.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ endfunction()
function(mlir_add_rewriter td_base)
set(LLVM_TARGET_DEFINITIONS ${td_base}.td)
mlir_tablegen(${td_base}.cpp.inc -gen-rewriters "-I${CMAKE_SOURCE_DIR}/infrt/dialect/pass")
add_public_tablegen_target(${td_base}_IncGen)
add_custom_target(${td_base}_inc DEPENDS ${td_base}_IncGen)
add_public_tablegen_target(MLIR${td_base}IncGen)
add_dependencies(mlir-headers MLIR${td_base}IncGen)
endfunction()

# Execute the mlir script with infrt-exec program.
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ ENDIF()

if(NOT DEFINED XPU_BASE_URL)
SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220228")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220307")
else()
SET(XPU_BASE_URL "${XPU_BASE_URL}")
endif()
Expand Down
12 changes: 6 additions & 6 deletions cmake/operators.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -293,11 +293,11 @@ function(op_library TARGET)
# Define operators that don't need pybind here.
foreach(manual_pybind_op "compare_all_op" "compare_op" "logical_op" "bitwise_op" "nccl_op"
"tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op")
if ("${TARGET}" STREQUAL "${manual_pybind_op}")
set(pybind_flag 1)
endif()
endforeach()

if ("${TARGET}" STREQUAL "${manual_pybind_op}")
set(pybind_flag 1)
endif()
endforeach()

# The registration of USE_OP, please refer to paddle/fluid/framework/op_registry.h.
# Note that it's enough to just adding one operator to pybind in a *_op.cc file.
Expand Down Expand Up @@ -478,7 +478,7 @@ function(op_library TARGET)
if (${pybind_flag} EQUAL 0)
# NOTE(*): activation use macro to regist the kernels, set use_op manually.
if(${TARGET} STREQUAL "activation")
file(APPEND ${pybind_file} "USE_OP(relu);\n")
file(APPEND ${pybind_file} "USE_OP_ITSELF(relu);\n")
elseif(${TARGET} STREQUAL "fake_dequantize")
file(APPEND ${pybind_file} "USE_OP(fake_dequantize_max_abs);\n")
elseif(${TARGET} STREQUAL "fake_quantize")
Expand Down
122 changes: 59 additions & 63 deletions cmake/phi.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ function(kernel_library TARGET)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu)
list(APPEND gpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu)
endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpudnn/${TARGET}_gpudnn.cu)
list(APPEND gpudnn_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpudnn/${TARGET}_gpudnn.cu)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpudnn/${TARGET}.cu)
list(APPEND gpudnn_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpudnn/${TARGET}.cu)
endif()
endif()
if (WITH_XPU)
Expand Down Expand Up @@ -197,92 +197,88 @@ function(kernel_library TARGET)

# kernel source file level
# level 1: base device kernel
# - cpu_srcs / gpu_srcs / xpu_srcs / kps_srcs
# - cpu_srcs / gpu_srcs / xpu_srcs / gpudnn_srcs / kps_srcs
# level 2: device-independent kernel
# - common_srcs
# level 3: Kernel implemented by reusing device-independent kernel
# - selected_rows_srcs
set(base_device_kernels)
set(device_independent_kernel)
set(high_level_kernels)

# Build Target according different src organization
if((${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR
${xpu_srcs_len} GREATER 0 OR ${gpudnn_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0) AND
(${common_srcs_len} GREATER 0 OR ${selected_rows_srcs_len} GREATER 0))
# If the common_srcs/selected_rows_srcs depends on specific device srcs, build target using this rule.
# 1. Base device kernel compile
if (${cpu_srcs_len} GREATER 0)
cc_library(${TARGET}_cpu SRCS ${cpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
list(APPEND base_device_kernels ${TARGET}_cpu)
endif()
if (${gpu_srcs_len} GREATER 0)
if (WITH_GPU)
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${gpudnn_srcs_len} GREATER 0)
nv_library(${TARGET}_part SRCS ${cpu_srcs} ${gpu_srcs} ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
nv_library(${TARGET} SRCS ${common_srcs} ${selected_rows_srcs} DEPS ${TARGET}_part)
endif()
nv_library(${TARGET}_gpu SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
elseif (WITH_ROCM)
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${gpudnn_srcs_len} GREATER 0)
hip_library(${TARGET}_part SRCS ${cpu_srcs} ${gpu_srcs} ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
hip_library(${TARGET} SRCS ${common_srcs} ${selected_rows_srcs} DEPS ${TARGET}_part)
endif()
elseif (WITH_XPU_KP)
if (${cpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0)
xpu_library(${TARGET}_part SRCS ${cpu_srcs} ${xpu_srcs} ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
xpu_library(${TARGET} SRCS ${common_srcs} ${selected_rows_srcs} DEPS ${TARGET}_part)
endif()
else()
if (${cpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0)
cc_library(${TARGET}_part SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
cc_library(${TARGET} SRCS ${common_srcs} ${selected_rows_srcs} DEPS ${TARGET}_part)
endif()
hip_library(${TARGET}_gpu SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
# If there are only specific device srcs, build target using this rule.
elseif (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${gpudnn_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0)
list(APPEND base_device_kernels ${TARGET}_gpu)
endif()
if (${xpu_srcs_len} GREATER 0)
cc_library(${TARGET}_xpu SRCS ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
list(APPEND base_device_kernels ${TARGET}_xpu)
endif()
if (${gpudnn_srcs_len} GREATER 0)
if (WITH_GPU)
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${gpudnn_srcs_len} GREATER 0)
nv_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
nv_library(${TARGET}_gpudnn SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
elseif (WITH_ROCM)
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${gpudnn_srcs_len} GREATER 0)
hip_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
elseif (WITH_XPU_KP)
if (${cpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0)
xpu_library(${TARGET} SRCS ${cpu_srcs} ${xpu_srcs} ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
else()
if (${cpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0)
cc_library(${TARGET} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
hip_library(${TARGET}_gpudnn SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
# If the selected_rows_srcs depends on common_srcs, build target using this rule.
elseif (${common_srcs_len} GREATER 0 AND ${selected_rows_srcs_len} GREATER 0)
list(APPEND base_device_kernels ${TARGET}_gpudnn)
endif()
if (${kps_srcs_len} GREATER 0)
# only when WITH_XPU_KP, the kps_srcs_len can be > 0
xpu_library(${TARGET}_kps SRCS ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
list(APPEND base_device_kernels ${TARGET}_kps)
endif()

# 2. Device-independent kernel compile
if (${common_srcs_len} GREATER 0)
if (WITH_GPU)
nv_library(${TARGET}_part SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
nv_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${TARGET}_part)
nv_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
elseif (WITH_ROCM)
hip_library(${TARGET}_part SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
hip_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${TARGET}_part)
hip_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
elseif (WITH_XPU_KP)
xpu_library(${TARGET}_part SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
xpu_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${TARGET}_part)
xpu_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
else()
cc_library(${TARGET}_part SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
cc_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${TARGET}_part)
cc_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
endif()
# If there are only common_srcs or selected_rows_srcs, build target using below rules.
elseif (${common_srcs_len} GREATER 0)
list(APPEND device_independent_kernel ${TARGET}_common)
endif()

# 3. Reusing kernel compile
if (${selected_rows_srcs_len} GREATER 0)
if (WITH_GPU)
nv_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
nv_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
elseif (WITH_ROCM)
hip_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
hip_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
elseif (WITH_XPU_KP)
xpu_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
xpu_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
else()
cc_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
cc_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
endif()
elseif (${selected_rows_srcs_len} GREATER 0)
list(APPEND high_level_kernels ${TARGET}_sr)
endif()

# 4. Unify target compile
list(LENGTH base_device_kernels base_device_kernels_len)
list(LENGTH device_independent_kernel device_independent_kernel_len)
list(LENGTH high_level_kernels high_level_kernels_len)
if (${base_device_kernels_len} GREATER 0 OR ${device_independent_kernel_len} GREATER 0 OR
${high_level_kernels_len} GREATER 0)
if (WITH_GPU)
nv_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
nv_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels})
elseif (WITH_ROCM)
hip_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
hip_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels})
elseif (WITH_XPU_KP)
xpu_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
xpu_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels})
else()
cc_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
cc_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels})
endif()
else()
set(target_build_flag 0)
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/distributed/collective/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ cc_library(eager_reducer SRCS reducer.cc DEPS eager_api processgroup)
if(WITH_NCCL)
cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api)
endif()
if(WITH_ASCEND_CL)
cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api)
endif()
Loading

1 comment on commit eb6b5c1

@paddle-bot-old
Copy link

@paddle-bot-old paddle-bot-old bot commented on eb6b5c1 Mar 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🕵️ CI failures summary

🔍 PR: #36541 Commit ID: eb6b5c1 contains failed CI.

🔹 Failed: PR-CI-Coverage

Unknown Failed
2022-03-11 03:46:48         Start  161: op_registry_test
2022-03-11 03:46:48 Start 160: program_processing_test
2022-03-11 03:46:48 Start 164: prune_test
2022-03-11 03:46:48 Start 162: dist_multi_trainer_test
2022-03-11 03:46:48 Start 167: op_kernel_type_test
2022-03-11 03:46:48 Start 165: var_type_inference_test
2022-03-11 03:46:48 Start 169: tuple_test
2022-03-11 03:46:48 Start 168: cow_ptr_tests
2022-03-11 03:46:48 Start 172: op_compatible_info_test
2022-03-11 03:46:48 Start 170: inlined_vector_test
2022-03-11 03:46:48 Start 173: save_load_util_test
2022-03-11 03:46:48 Start 178: node_test
2022-03-11 03:46:48 Start 175: scope_guard_test
2022-03-11 03:46:48 Start 180: graph_test
2022-03-11 03:46:48 Start 179: pass_test
2022-03-11 03:46:48 Start 182: graph_to_program_pass_test
2022-03-11 03:46:48 Start 181: graph_helper_test
2022-03-11 03:46:48 Start 184: test_graph_pattern_detector
2022-03-11 03:46:48 Start 183: cost_model_test

Please sign in to comment.