Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Browse files Browse the repository at this point in the history
… conv_extra_input
  • Loading branch information
zyfncg committed Nov 29, 2022
2 parents 3c0d756 + a559a66 commit 03c86be
Show file tree
Hide file tree
Showing 3,698 changed files with 39,476 additions and 39,111 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ paddle/fluid/operators/generated_op.cc
paddle/fluid/operators/generated_sparse_op.cc
paddle/phi/ops/compat/generated_sig.cc
paddle/phi/ops/compat/generated_sparse_sig.cc
paddle/phi/api/yaml/parsed_apis/
paddle/fluid/operators/generator/parsed_ops/
paddle/fluid/pybind/tmp_eager_op_function_impl.h
paddle/fluid/pybind/eager_op_function_impl.h
Expand Down
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ repos:
hooks:
- id: black
files: (.*\.(py|pyi|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort
- repo: https://github.com/PyCQA/flake8
rev: 4.0.1
hooks:
Expand Down
4 changes: 2 additions & 2 deletions cmake/copyfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import glob
import os
import sys
import shutil
import glob
import sys


def main():
Expand Down
62 changes: 32 additions & 30 deletions cmake/external/lite.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,21 @@ if(LITE_WITH_XPU)
endif()
endif()

if(WITH_ARM)
if(LITE_WITH_XPU)
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.xpu)
elseif(LITE_WITH_NNADAPTER)
message("Enable LITE_WITH_NNADAPTER")
if(NNADAPTER_WITH_HUAWEI_ASCEND_NPU)
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.nnadapter)
endif()
else()
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8)
endif()
else()
set(LITE_OUTPUT_BIN_DIR inference_lite_lib)
endif()

if(LITE_WITH_NNADAPTER)
add_definitions(-DLITE_SUBGRAPH_WITH_NNADAPTER)
if(NNADAPTER_WITH_HUAWEI_ASCEND_NPU)
Expand All @@ -50,6 +65,12 @@ if(NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
set(LITE_PROJECT extern_lite)
set(LITE_PREFIX_DIR ${THIRD_PARTY_PATH}/lite)
set(LITE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/lite)
set(LITE_BINARY_DIR ${LITE_PREFIX_DIR}/src/extern_lite-build)
set(LITE_SOURCE_DIR ${LITE_PREFIX_DIR}/src/extern_lite)

set(LITE_SHARED_LIB
${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so
)

if(NOT LITE_GIT_TAG)
set(LITE_GIT_TAG 81ef66554099800c143a0feff6e0a491b3b0d12e)
Expand All @@ -61,7 +82,8 @@ if(NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)

# No quotes, so cmake can resolve it as a command with arguments.
if(WITH_ARM)
set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j)
set(LITE_BUILD_COMMAND ${CMAKE_COMMAND} --build . --target
publish_inference -j)
message(WARNING "BUILD_COMMAND: ${LITE_BUILD_COMMAND}")
set(LITE_OPTIONAL_ARGS
-DWITH_MKL=OFF
Expand Down Expand Up @@ -110,9 +132,11 @@ if(NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS}
${LITE_OPTIONAL_ARGS})
${LITE_OPTIONAL_ARGS}
BUILD_BYPRODUCTS ${LITE_SHARED_LIB})
else()
set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j)
set(LITE_BUILD_COMMAND ${CMAKE_COMMAND} --build . --target
publish_inference -j)
set(LITE_OPTIONAL_ARGS
-DWITH_MKL=ON
-DLITE_WITH_CUDA=OFF
Expand Down Expand Up @@ -157,28 +181,9 @@ if(NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS}
${LITE_OPTIONAL_ARGS})
${LITE_OPTIONAL_ARGS}
BUILD_BYPRODUCTS ${LITE_SHARED_LIB})
endif()
ExternalProject_Get_Property(${LITE_PROJECT} BINARY_DIR)
ExternalProject_Get_Property(${LITE_PROJECT} SOURCE_DIR)
set(LITE_BINARY_DIR ${BINARY_DIR})
set(LITE_SOURCE_DIR ${SOURCE_DIR})

endif()

if(WITH_ARM)
if(LITE_WITH_XPU)
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.xpu)
elseif(LITE_WITH_NNADAPTER)
message("Enable LITE_WITH_NNADAPTER")
if(NNADAPTER_WITH_HUAWEI_ASCEND_NPU)
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.nnadapter)
endif()
else()
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8)
endif()
else()
set(LITE_OUTPUT_BIN_DIR inference_lite_lib)
endif()

message(STATUS "Paddle-lite BINARY_DIR: ${LITE_BINARY_DIR}")
Expand All @@ -199,12 +204,9 @@ function(external_lite_libs alias path)
endfunction()

external_lite_libs(
lite_full_static
lite_full_shared
${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so
)
set(LITE_SHARED_LIB
${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so
)

if(LITE_WITH_NNADAPTER)
set(LITE_NNADAPTER_LIB
Expand All @@ -214,13 +216,13 @@ if(LITE_WITH_NNADAPTER)
lite_nnadapter
${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so
${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libhuawei_ascend_npu.so)
set(LITE_DEPS lite_full_static lite_nnadapter)
set(LITE_DEPS lite_full_shared lite_nnadapter)
set(LITE_NNADAPTER_NPU_LIB
${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libhuawei_ascend_npu.so
)
endif()
else()
set(LITE_DEPS lite_full_static)
set(LITE_DEPS lite_full_shared)
endif()

add_definitions(-DPADDLE_WITH_LITE)
Expand Down
7 changes: 3 additions & 4 deletions cmake/external/rocksdb.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,17 @@ ExternalProject_Add(
-DWITH_GFLAGS=OFF
-DCMAKE_CXX_FLAGS=${ROCKSDB_CMAKE_CXX_FLAGS}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
# BUILD_BYPRODUCTS ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/librocksdb.a
INSTALL_COMMAND
mkdir -p ${ROCKSDB_INSTALL_DIR}/lib/ && cp
${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/librocksdb.a ${ROCKSDB_LIBRARIES}
&& cp -r ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/include
${ROCKSDB_INSTALL_DIR}/
BUILD_IN_SOURCE 1
BYPRODUCTS ${ROCKSDB_LIBRARIES})

add_dependencies(extern_rocksdb snappy)
BUILD_BYPRODUCTS ${ROCKSDB_LIBRARIES})

add_library(rocksdb STATIC IMPORTED GLOBAL)

add_dependencies(extern_rocksdb snappy)
set_property(TARGET rocksdb PROPERTY IMPORTED_LOCATION ${ROCKSDB_LIBRARIES})
add_dependencies(rocksdb extern_rocksdb)

Expand Down
2 changes: 1 addition & 1 deletion cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so")
if(NOT DEFINED XPU_BASE_URL)
set(XPU_BASE_URL_WITHOUT_DATE
"https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev")
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221116")
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221124")
else()
set(XPU_BASE_URL "${XPU_BASE_URL}")
endif()
Expand Down
104 changes: 104 additions & 0 deletions paddle/fluid/distributed/collective/NCCLTools.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,109 @@ std::string SerializeNCCLUniqueId(const ncclUniqueId& ncclID) {
return oss.str();
}

void StaticCheckTensor(const phi::DenseTensor& tensor,
int rank,
int world_size) {
// place check
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(tensor.place()),
true,
platform::errors::InvalidArgument("Tensor should be in GPU place."));
// rank check
PADDLE_ENFORCE_GE(rank,
0,
platform::errors::InvalidArgument(
"Rank should be greater than or equal to 0."));
PADDLE_ENFORCE_LT(
rank,
world_size,
platform::errors::InvalidArgument("Rank is out of the process group."));
}

// static check for collective
void StaticCheckTensors(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size,
int out_size_factor,
int in_size_factor) {
// place check
PADDLE_ENFORCE_EQ(platform::is_gpu_place(out_tensor.place()),
true,
platform::errors::InvalidArgument(
"Output tensor should be in GPU place."));
PADDLE_ENFORCE_EQ(platform::is_gpu_place(in_tensor.place()),
true,
platform::errors::InvalidArgument(
"Input tensor should be in GPU place."));
// rank check
PADDLE_ENFORCE_GE(rank,
0,
platform::errors::InvalidArgument(
"Rank should be greater than or equal to 0."));
PADDLE_ENFORCE_LT(
rank,
world_size,
platform::errors::InvalidArgument("Rank is out of the process group."));
// shape check
int64_t out_size = out_tensor.numel();
PADDLE_ENFORCE_GT(out_size,
0,
platform::errors::InvalidArgument(
"Size of output tensor should be greater than 0."));
int64_t in_size = in_tensor.numel();
PADDLE_ENFORCE_GT(in_size,
0,
platform::errors::InvalidArgument(
"Size of input tensor should be greater than 0."));
PADDLE_ENFORCE_EQ(
out_size * out_size_factor,
in_size * in_size_factor,
platform::errors::InvalidArgument(
"Input and output tensors should have matching sizes."));
// dtype check
PADDLE_ENFORCE_EQ(
out_tensor.dtype(),
in_tensor.dtype(),
platform::errors::InvalidArgument(
"Input and output tensors should have the same data type."));
}

void StaticCheckTensorsSameShape(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size) {
StaticCheckTensors(out_tensor,
in_tensor,
rank,
world_size,
/*out_size_factor*/ 1,
/*in_size_factor*/ 1);
}

void StaticCheckTensorsScatterLikeShape(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size) {
StaticCheckTensors(out_tensor,
in_tensor,
rank,
world_size,
/*out_size_factor*/ world_size,
/*in_size_factor*/ 1);
}

void StaticCheckTensorsGatherLikeShape(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size) {
StaticCheckTensors(out_tensor,
in_tensor,
rank,
world_size,
/*out_size_factor*/ 1,
/*in_size_factor*/ world_size);
}

} // namespace distributed
} // namespace paddle
27 changes: 27 additions & 0 deletions paddle/fluid/distributed/collective/NCCLTools.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,5 +63,32 @@ ncclRedOp_t ToNCCLRedType(ReduceOp reduction);

std::string SerializeNCCLUniqueId(const ncclUniqueId& ncclID);

// static check for p2p
void StaticCheckTensor(const phi::DenseTensor& tensor,
int rank,
int world_size);

// static check for collective
void StaticCheckTensors(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size,
int out_size_factor,
int in_size_factor);

void StaticCheckTensorsSameShape(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size);

void StaticCheckTensorsScatterLikeShape(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size);

void StaticCheckTensorsGatherLikeShape(const phi::DenseTensor& out_tensor,
const phi::DenseTensor& in_tensor,
int rank,
int world_size);
} // namespace distributed
} // namespace paddle
51 changes: 51 additions & 0 deletions paddle/fluid/distributed/collective/ProcessGroupBKCL.cc
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,57 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::AllGather(
use_calc_stream);
}

std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::Reduce(
phi::DenseTensor* out_tensor,
const phi::DenseTensor& in_tensor,
const ReduceOptions& opts,
bool sync_op,
bool use_calc_stream) {
return Collective(
out_tensor,
in_tensor,
[&](phi::DenseTensor* output,
const phi::DenseTensor& input,
BKCLContext_t comm,
const XPUStream& stream) {
phi::DenseTensor output_t(*output);
const auto& place = input.place();
auto* calc_ctx = static_cast<phi::XPUContext*>(
platform::DeviceContextPool::Instance().Get(place));
switch (input.dtype()) {
case phi::DataType::FLOAT32:
calc_ctx->template Alloc<float>(&output_t);
break;
case phi::DataType::FLOAT16:
calc_ctx->template Alloc<float16>(&output_t);
break;
case phi::DataType::INT32:
calc_ctx->template Alloc<int>(&output_t);
break;
default:
VLOG(0) << "Error: type " << input.dtype() << " not supported for "
<< GetBackendName();
break;
}
int ret =
bkcl_all_reduce(comm,
input.data(),
output_t.data(),
input.numel(),
platform::ToBKCLDataType(
framework::TransToProtoVarType(input.type())),
ToBKCLRedType(opts.reduce_op),
stream);
if (rank_ == opts.root_rank) {
*output = output_t;
}
return ret;
},
CommType::ALLREDUCE,
sync_op,
use_calc_stream);
}

std::shared_ptr<ProcessGroup::Task> ProcessGroupBKCL::Barrier(
const BarrierOptions& opts) {
PADDLE_ENFORCE_GE(opts.device_id,
Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/distributed/collective/ProcessGroupBKCL.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@ class ProcessGroupBKCL : public ProcessGroupStream {
bool sync_op,
bool use_calc_stream) override;

std::shared_ptr<ProcessGroup::Task> Reduce(phi::DenseTensor* out_tensor,
const phi::DenseTensor& in_tensor,
const ReduceOptions& opts,
bool sync_op,
bool use_calc_stream) override;

std::shared_ptr<ProcessGroup::Task> Barrier(
const BarrierOptions& = BarrierOptions()) override;

Expand Down
Loading

0 comments on commit 03c86be

Please sign in to comment.