Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/develop' into Cleanup__mkldnn_…
Browse files Browse the repository at this point in the history
…inplace_pass
  • Loading branch information
HulekJakub committed Dec 6, 2022
2 parents a849f10 + c838c1e commit ae0399a
Show file tree
Hide file tree
Showing 281 changed files with 6,697 additions and 6,196 deletions.
10 changes: 8 additions & 2 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ exclude =
ignore =
# E, see https://pycodestyle.pycqa.org/en/latest/intro.html#error-codes
E203,
E401,E402,
E402,
E501,
E721,E722,E731,E741,

# F, see https://flake8.pycqa.org/en/latest/user/error-codes.html
F405,
F811,F841,
F841,

# W, see https://pycodestyle.pycqa.org/en/latest/intro.html#error-codes
W503
Expand All @@ -37,3 +37,9 @@ per-file-ignores =
.cmake-format.py: F821
python/paddle/fluid/tests/unittests/dygraph_to_static/test_loop.py: F821
python/paddle/fluid/tests/unittests/dygraph_to_static/test_closure_analysis.py: F821
# These files will be fixed in the future
python/paddle/fluid/tests/unittests/fft/test_fft_with_static_graph.py: F811
python/paddle/fluid/tests/unittests/test_activation_nn_grad.py: F811
python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py: F811
python/paddle/fluid/tests/unittests/test_matmul_v2_op.py: F811
python/paddle/fluid/tests/unittests/test_rrelu_op.py: F811
2 changes: 1 addition & 1 deletion cmake/external/cutlass.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ include(ExternalProject)
set(CUTLASS_PREFIX_DIR ${THIRD_PARTY_PATH}/cutlass)

set(CUTLASS_REPOSITORY https://github.com/NVIDIA/cutlass.git)
set(CUTLASS_TAG v2.9.1)
set(CUTLASS_TAG v2.10.0)

include_directories("${THIRD_PARTY_PATH}/cutlass/src/extern_cutlass/")
include_directories("${THIRD_PARTY_PATH}/cutlass/src/extern_cutlass/include/")
Expand Down
4 changes: 2 additions & 2 deletions cmake/external/dgc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ set(DGC_INCLUDE_DIR
set(DGC_LIBRARIES
"${DGC_INSTALL_DIR}/lib/libdgc.a"
CACHE FILEPATH "dgc library." FORCE)
set(DGC_URL "https://fleet.bj.bcebos.com/dgc/collective_f66ef73.tgz")
set(DGC_URL "https://fleet.bj.bcebos.com/dgc/collective_7369ff.tgz")
include_directories(${DGC_INCLUDE_DIR})

ExternalProject_Add(
extern_dgc
${EXTERNAL_PROJECT_LOG_ARGS}
URL ${DGC_URL}
URL_MD5 "94e6fa1bc97169d0e1aad44570fe3251"
URL_MD5 "ede459281a0f979da8d84f81287369ff"
PREFIX "${DGC_PREFIX_DIR}"
CONFIGURE_COMMAND ""
BUILD_COMMAND make -j${NPROC}
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so")
if(NOT DEFINED XPU_BASE_URL)
set(XPU_BASE_URL_WITHOUT_DATE
"https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev")
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221124")
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221201")
else()
set(XPU_BASE_URL "${XPU_BASE_URL}")
endif()
Expand Down
12 changes: 3 additions & 9 deletions paddle/fluid/framework/details/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,7 @@ if(WITH_GPU)
memory
dynload_cuda
variable_visitor
place
device_memory_aligment)
place)
nv_library(
grad_merge_all_reduce_op_handle
SRCS grad_merge_all_reduce_op_handle.cc
Expand All @@ -105,7 +104,6 @@ if(WITH_GPU)
dynload_cuda
variable_visitor
place
device_memory_aligment
all_reduce_op_handle
fused_all_reduce_op_handle)

Expand Down Expand Up @@ -170,8 +168,7 @@ elseif(WITH_ROCM)
memory
dynload_cuda
variable_visitor
place
device_memory_aligment)
place)
hip_library(
grad_merge_all_reduce_op_handle
SRCS grad_merge_all_reduce_op_handle.cc
Expand All @@ -183,7 +180,6 @@ elseif(WITH_ROCM)
dynload_cuda
variable_visitor
place
device_memory_aligment
all_reduce_op_handle
fused_all_reduce_op_handle)

Expand Down Expand Up @@ -233,8 +229,7 @@ else()
ddim
memory
variable_visitor
place
device_memory_aligment)
place)
cc_library(
grad_merge_all_reduce_op_handle
SRCS grad_merge_all_reduce_op_handle.cc
Expand All @@ -245,7 +240,6 @@ else()
memory
variable_visitor
place
device_memory_aligment
all_reduce_op_handle
fused_all_reduce_op_handle)
if(WITH_DISTRIBUTE)
Expand Down
7 changes: 3 additions & 4 deletions paddle/fluid/framework/details/fused_all_reduce_op_handle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/platform/device_memory_aligment.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/phi/backends/device_memory_aligment.h"

DEFINE_bool(skip_fused_all_reduce_check, false, "");
DECLARE_bool(allreduce_record_one_event);
Expand Down Expand Up @@ -247,7 +247,7 @@ void FusedAllReduceOpHandle::FusedAllReduceFunc(
for (size_t k = 1; k < g_tensor.size(); ++k) {
const void *cur_address = g_tensor.at(k - 1).second->data();
int64_t len = g_tensor.at(k - 1).second->numel();
auto offset = platform::Alignment(len * size_of_dtype, places_[0]);
auto offset = phi::Alignment(len * size_of_dtype, places_[0]);
void *infer_next_address = reinterpret_cast<void *>(
reinterpret_cast<uintptr_t>(cur_address) + offset);
const void *next_address = g_tensor.at(k).second->data();
Expand Down Expand Up @@ -400,8 +400,7 @@ void FusedAllReduceOpHandle::GetDTypeAndNumel(
"The size of grad tensors of fused_all_reduce_op_handle "
"must be > 0, but got %d.",
len));
*numel +=
platform::Alignment(len * size_of_dtype, places_[0]) / size_of_dtype;
*numel += phi::Alignment(len * size_of_dtype, places_[0]) / size_of_dtype;
}
}

Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/framework/ir/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ pass_library(delete_c_identity_op_pass inference)
pass_library(preln_residual_bias_fuse_pass inference)
pass_library(delete_fill_constant_op_pass inference)
pass_library(constant_folding_pass inference)
pass_library(float_to_half_pass inference)
pass_library(conv2d_fusion_layout_transfer_pass inference)
pass_library(simplify_with_basic_ops_pass base)
pass_library(fc_elementwise_layernorm_fuse_pass base)
Expand Down Expand Up @@ -135,6 +136,7 @@ if(WITH_TENSORRT)
pass_library(remove_padding_recover_padding_pass inference)
pass_library(delete_remove_padding_recover_padding_pass inference)
pass_library(layernorm_shift_partition_fuse_pass inference)
pass_library(reverse_roll_fuse_pass inference)
pass_library(preln_layernorm_x_fuse_pass inference)
endif()

Expand Down
Loading

0 comments on commit ae0399a

Please sign in to comment.