Skip to content

Commit

Permalink
Merge branch 'multnormal_api' of https://github.com/dasenCoding/Paddle
Browse files Browse the repository at this point in the history
…into multnormal_api
  • Loading branch information
dasenCoding committed Nov 25, 2022
2 parents 29b0578 + 006d08a commit 30c925e
Show file tree
Hide file tree
Showing 1,202 changed files with 32,739 additions and 30,167 deletions.
6 changes: 5 additions & 1 deletion .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ ignore =

# F, see https://flake8.pycqa.org/en/latest/user/error-codes.html
F405,
F811,F821,F841,
F811,F841,

# W, see https://pycodestyle.pycqa.org/en/latest/intro.html#error-codes
W503
Expand All @@ -33,3 +33,7 @@ per-file-ignores =
python/paddle/fluid/tests/unittests/collective/fleet/test_hdfs1.py:E101,W191
# Ignore unused imports in __init__.py
__init__.py: F401
# Ignore undefined variables in CMake config and some dygraph_to_static tests
.cmake-format.py: F821
python/paddle/fluid/tests/unittests/dygraph_to_static/test_loop.py: F821
python/paddle/fluid/tests/unittests/dygraph_to_static/test_closure_analysis.py: F821
1 change: 1 addition & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ This is an incomplete list of authors of [Paddle](https://github.com/PaddlePaddl
| dragonwarrior | Long Wang |
| dyning | Yuning Du |
| emailweixu | Wei Xu |
| engineer1109 | Jia-Liang Wang |
| gangliao | Gang Liao |
| gongweibao | Wei-Bao Gong |
| guru4elephant | Daxiang Dong |
Expand Down
48 changes: 12 additions & 36 deletions cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,80 +10,55 @@ set(XPU_RT_LIB_NAME "libxpurt.so")
if(NOT DEFINED XPU_BASE_URL)
set(XPU_BASE_URL_WITHOUT_DATE
"https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev")
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221110")
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221124")
else()
set(XPU_BASE_URL "${XPU_BASE_URL}")
endif()

# ubuntu and centos: use output by XDNN API team
if(NOT DEFINED XPU_XDNN_BASE_URL)
set(XPU_XDNN_BASE_URL_WITHOUT_DATE
"https://klx-sdk-release-public.su.bcebos.com/xdnn/dev")
set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20221109")
else()
set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}")
endif()

set(XPU_XCCL_BASE_URL
"https://klx-sdk-release-public.su.bcebos.com/xccl/release/1.0.0")

if(WITH_AARCH64)
set(XPU_XRE_DIR_NAME "xre-kylin_aarch64")
set(XPU_XDNN_DIR_NAME "xdnn-kylin_aarch64")
set(XPU_XCCL_DIR_NAME "xccl-kylin_aarch64")
set(XPU_XDNN_URL
"${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
elseif(WITH_SUNWAY)
set(XPU_XRE_DIR_NAME "xre-deepin_sw6_64")
set(XPU_XDNN_DIR_NAME "xdnn-deepin_sw6_64")
set(XPU_XCCL_DIR_NAME "xccl-deepin_sw6_64")
set(XPU_XDNN_URL
"${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
elseif(WITH_BDCENTOS)
set(XPU_XRE_DIR_NAME "xre-bdcentos_x86_64")
set(XPU_XDNN_DIR_NAME "xdnn-bdcentos_x86_64")
set(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
# ubuntu and centos: use output by XDNN API team
set(XPU_XDNN_URL
"${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
elseif(WITH_UBUNTU)
set(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64")
set(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64")
set(XPU_XCCL_DIR_NAME "xccl-ubuntu_x86_64")
# ubuntu and centos: use output by XDNN API team
set(XPU_XDNN_URL
"${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
elseif(WITH_CENTOS)
set(XPU_XRE_DIR_NAME "xre-centos7_x86_64")
set(XPU_XDNN_DIR_NAME "xdnn-bdcentos_x86_64")
set(XPU_XDNN_DIR_NAME "xdnn-centos7_x86_64")
set(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
# ubuntu and centos: use output by XDNN API team
set(XPU_XDNN_URL
"${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
else()
set(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64")
set(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64")
set(XPU_XCCL_DIR_NAME "xccl-ubuntu_x86_64")
# default: use output by XDNN API team
set(XPU_XDNN_URL
"${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
endif()

set(XPU_XRE_URL
"${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
set(XPU_XDNN_URL
"${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
set(XPU_XCCL_URL
"${XPU_XCCL_BASE_URL}/${XPU_XCCL_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
set(XPU_PACK_DEPENCE_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/pack_paddle_depence.sh"
CACHE STRING "" FORCE)
set(XPU_CHECK_DEPENCE_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/check_xpu_dependence.sh"
CACHE STRING "" FORCE)

set(SNAPPY_PREFIX_DIR "${THIRD_PARTY_PATH}/xpu")
set(XPU_DOWNLOAD_DIR "${SNAPPY_PREFIX_DIR}/src/${XPU_PROJECT}")
Expand All @@ -108,9 +83,10 @@ ExternalProject_Add(
PREFIX ${SNAPPY_PREFIX_DIR}
DOWNLOAD_DIR ${XPU_DOWNLOAD_DIR}
DOWNLOAD_COMMAND
wget ${XPU_PACK_DEPENCE_URL} && bash pack_paddle_depence.sh ${XPU_XRE_URL}
${XPU_XRE_DIR_NAME} ${XPU_XDNN_URL} ${XPU_XDNN_DIR_NAME} ${XPU_XCCL_URL}
${XPU_XCCL_DIR_NAME}
wget ${XPU_CHECK_DEPENCE_URL} && bash check_xpu_dependence.sh
${XPU_BASE_URL} ${XPU_XCCL_BASE_URL} && wget ${XPU_PACK_DEPENCE_URL} && bash
pack_paddle_depence.sh ${XPU_XRE_URL} ${XPU_XRE_DIR_NAME} ${XPU_XDNN_URL}
${XPU_XDNN_DIR_NAME} ${XPU_XCCL_URL} ${XPU_XCCL_DIR_NAME}
DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XPU_INSTALL_ROOT}
Expand Down
18 changes: 3 additions & 15 deletions paddle/fluid/distributed/collective/BKCLTools.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,23 +77,11 @@ class XPUEventManager {
device_index_));

platform::XPUDeviceGuard guard(device_index_);
PADDLE_ENFORCE_XPU_SUCCESS(xpu_event_record(event_, ctx.stream()));
// TODO(zhangxiaoci) temporary solution: xpu::event seems buggy
PADDLE_ENFORCE_XPU_SUCCESS(xpu_wait(ctx.stream()));
}

void Block(const XPUContext& ctx) const {
if (is_created_) {
auto device_index = ctx.GetPlace().device;
PADDLE_ENFORCE_EQ(device_index,
device_index_,
platform::errors::PreconditionNotMet(
"XPUContext's device %d does not match"
"Event's device %d",
device_index,
device_index_));
platform::XPUDeviceGuard guard(device_index_);
PADDLE_ENFORCE_XPU_SUCCESS(xpu_stream_wait_event(ctx.stream(), event_));
}
}
void Block(const XPUContext& ctx) const {}

private:
bool is_created_{false};
Expand Down
104 changes: 104 additions & 0 deletions paddle/fluid/distributed/collective/NCCLTools.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,109 @@ std::string SerializeNCCLUniqueId(const ncclUniqueId& ncclID) {
return oss.str();
}

void StaticCheckTensor(const phi::DenseTensor& tensor,
int rank,
int world_size) {
// place check
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(tensor.place()),
true,
platform::errors::InvalidArgument("Tensor should be in GPU place."));
// rank check
PADDLE_ENFORCE_GE(rank,
0,
platform::errors::InvalidArgument(
"Rank should be greater than or equal to 0."));
PADDLE_ENFORCE_LT(
rank,
world_size,
platform::errors::InvalidArgument("Rank is out of the process group."));
}

// static check for collective
void StaticCheckTensors(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size,
int out_size_factor,
int in_size_factor) {
// place check
PADDLE_ENFORCE_EQ(platform::is_gpu_place(out_tensor.place()),
true,
platform::errors::InvalidArgument(
"Output tensor should be in GPU place."));
PADDLE_ENFORCE_EQ(platform::is_gpu_place(in_tensor.place()),
true,
platform::errors::InvalidArgument(
"Input tensor should be in GPU place."));
// rank check
PADDLE_ENFORCE_GE(rank,
0,
platform::errors::InvalidArgument(
"Rank should be greater than or equal to 0."));
PADDLE_ENFORCE_LT(
rank,
world_size,
platform::errors::InvalidArgument("Rank is out of the process group."));
// shape check
int64_t out_size = out_tensor.numel();
PADDLE_ENFORCE_GT(out_size,
0,
platform::errors::InvalidArgument(
"Size of output tensor should be greater than 0."));
int64_t in_size = in_tensor.numel();
PADDLE_ENFORCE_GT(in_size,
0,
platform::errors::InvalidArgument(
"Size of input tensor should be greater than 0."));
PADDLE_ENFORCE_EQ(
out_size * out_size_factor,
in_size * in_size_factor,
platform::errors::InvalidArgument(
"Input and output tensors should have matching sizes."));
// dtype check
PADDLE_ENFORCE_EQ(
out_tensor.dtype(),
in_tensor.dtype(),
platform::errors::InvalidArgument(
"Input and output tensors should have the same data type."));
}

void StaticCheckTensorsSameShape(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size) {
StaticCheckTensors(out_tensor,
in_tensor,
rank,
world_size,
/*out_size_factor*/ 1,
/*in_size_factor*/ 1);
}

void StaticCheckTensorsScatterLikeShape(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size) {
StaticCheckTensors(out_tensor,
in_tensor,
rank,
world_size,
/*out_size_factor*/ world_size,
/*in_size_factor*/ 1);
}

void StaticCheckTensorsGatherLikeShape(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size) {
StaticCheckTensors(out_tensor,
in_tensor,
rank,
world_size,
/*out_size_factor*/ 1,
/*in_size_factor*/ world_size);
}

} // namespace distributed
} // namespace paddle
30 changes: 29 additions & 1 deletion paddle/fluid/distributed/collective/NCCLTools.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
namespace paddle {
namespace distributed {

#define NCCLCHECK(cmd) \
#define NCCL_CHECK(cmd) \
do { \
ncclResult_t r = cmd; \
if (r != ncclSuccess) { \
Expand All @@ -60,7 +60,35 @@ namespace distributed {
} while (0)

ncclRedOp_t ToNCCLRedType(ReduceOp reduction);

std::string SerializeNCCLUniqueId(const ncclUniqueId& ncclID);

// static check for p2p
void StaticCheckTensor(const phi::DenseTensor& tensor,
int rank,
int world_size);

// static check for collective
void StaticCheckTensors(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size,
int out_size_factor,
int in_size_factor);

void StaticCheckTensorsSameShape(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size);

void StaticCheckTensorsScatterLikeShape(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size);

void StaticCheckTensorsGatherLikeShape(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size);
} // namespace distributed
} // namespace paddle
16 changes: 5 additions & 11 deletions paddle/fluid/distributed/collective/ProcessGroup.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,6 @@ void ProcessGroup::Task::Synchronize() {}

void ProcessGroup::Task::UpdateWaitChain(const phi::DeviceContext& ctx) {}

ProcessGroup::ProcessGroup(int rank,
int size,
const platform::Place& place,
int gid)
: rank_(rank), size_(size), place_(place), gid_(gid) {
if (gid != IGNORE_ID) {
auto map = ProcessGroupMapFromGid::getInstance();
map->insert(gid_, this);
}
}

ProcessGroup::ProcessGroup(int rank, int size, int gid)
: rank_(rank), size_(size), gid_(gid) {
if (gid != IGNORE_ID) {
Expand All @@ -66,5 +55,10 @@ ProcessGroup::Task::Task(int rank,
bool sync_op)
: rank_(rank), comm_type_(comm_type), sync_op_(sync_op) {}

ProcessGroupIdMap& ProcessGroupIdMap::GetInstance() {
static ProcessGroupIdMap instance;
return instance;
}

} // namespace distributed
} // namespace paddle
Loading

0 comments on commit 30c925e

Please sign in to comment.