Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…

… dev/enable_standalone_executor_test
zhiqiu · Mar 23, 2022 · 81e84e0 · 81e84e0
2 parents a4fa1e2 + f4075db
commit 81e84e0
Show file tree

Hide file tree

Showing 1,187 changed files with 57,756 additions and 27,013 deletions.
diff --git a/.gitignore b/.gitignore
@@ -52,12 +52,12 @@ tools/__pycache__
 
 # This file is automatically generated.
 # TODO(zhiqiang) Move this file to build directory.
-paddle/infrt/dialect/pd_ops.td
+paddle/infrt/dialect/pd/ir/pd_ops.td
 paddle/infrt/dialect/phi/ir/phi_cpu_kernels.td
 paddle/infrt/dialect/phi/ir/phi_gpu_kernels.td
 tools/infrt/kernels.json
 tools/infrt/kernel_signature.json
-paddle/infrt/dialect/pd_ops_info.h
+paddle/infrt/dialect/pd/common/pd_ops_info.h
 .lit_test_times.txt
 paddle/infrt/tests/dialect/Output
 paddle/infrt/tests/lit.cfg.py

diff --git a/README.md b/README.md
@@ -14,9 +14,8 @@ English | [简体中文](./README_cn.md)
 
 Welcome to the PaddlePaddle GitHub.
 
-PaddlePaddle, as the only independent R&D deep learning platform in China, has been officially open-sourced to professional communities since 2016. It is an industrial platform with advanced technologies and rich features that cover core deep learning frameworks, basic model libraries, end-to-end development kits, tools & components as well as service platforms.
-PaddlePaddle is originated from industrial practices with dedication and commitments to industrialization. It has been widely adopted by a wide range of sectors including manufacturing, agriculture, enterprise service, and so on while serving more than 4 million developers. With such advantages, PaddlePaddle has helped an increasing number of partners commercialize AI.
-
+PaddlePaddle, as the first independent R&D deep learning platform in China, has been officially open-sourced to professional communities since 2016. It is an industrial platform with advanced technologies and rich features that cover core deep learning frameworks, basic model libraries, end-to-end development kits, tools & components as well as service platforms.
+PaddlePaddle is originated from industrial practices with dedication and commitments to industrialization. It has been widely adopted by a wide range of sectors including manufacturing, agriculture, enterprise service, and so on while serving more than 4 million developers, 157,000 companies and generating 476,000 models. With such advantages, PaddlePaddle has helped an increasing number of partners commercialize AI.
 
 
 ## Installation

diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
@@ -11,7 +11,7 @@ elseif(NEW_RELEASE_ALL)
   add_definitions(-DNEW_RELEASE_ALL)
   set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80 86")
   set(paddle_known_gpu_archs10 "35 50 52 60 61 70 75")
-  set(paddle_known_gpu_archs11 "35 50 52 60 61 70 75 80")
+  set(paddle_known_gpu_archs11 "35 50 60 61 70 75 80")
 elseif(NEW_RELEASE_PYPI)
   message("Using New Release Strategy - Cubin Packge")
   add_definitions(-DNEW_RELEASE_PYPI)

diff --git a/cmake/external/paddle2onnx.cmake b/cmake/external/paddle2onnx.cmake
@@ -61,6 +61,7 @@ set(PADDLE2ONNX_OPTIONAL_ARGS
       -DONNX_CUSTOM_PROTOC_PATH=${PROTOC_BIN_PATH}
       -DWITH_STATIC=OFF
       -DCMAKE_INSTALL_PREFIX=${PADDLE2ONNX_INSTALL_DIR}
+      -DCMAKE_INSTALL_LIBDIR=${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}
       -DCMAKE_POSITION_INDEPENDENT_CODE=ON
       -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
       ${EXTERNAL_OPTIONAL_ARGS}

diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake
@@ -258,6 +258,12 @@ copy(inference_lib_dist
 copy(inference_lib_dist
         SRCS  ${PADDLE_SOURCE_DIR}/paddle/utils/any.h
         DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/)
+copy(inference_lib_dist
+        SRCS  ${PADDLE_SOURCE_DIR}/paddle/utils/optional.h
+        DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/)
+        copy(inference_lib_dist
+        SRCS  ${PADDLE_SOURCE_DIR}/paddle/utils/none.h
+        DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/)
 copy(inference_lib_dist
         SRCS  ${PADDLE_SOURCE_DIR}/paddle/extension.h
         DSTS  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/)

diff --git a/cmake/phi.cmake b/cmake/phi.cmake
@@ -118,7 +118,7 @@ function(kernel_library TARGET)
         if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc)
             list(APPEND common_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc)
         endif()
-        if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc)
+        if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc AND NOT WITH_XPU_KP)
             list(APPEND cpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc)
         endif()
         if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/selected_rows/${TARGET}.cc)
@@ -151,6 +151,9 @@ function(kernel_library TARGET)
                 file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.cu ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps)
                 list(APPEND kps_srcs ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps)
             endif()
+            if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc )
+                list(APPEND kps_srcs ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc)
+            endif()
         endif()
     else()
         # TODO(chenweihang): impl compile by source later

diff --git a/paddle/fluid/distributed/collective/CMakeLists.txt b/paddle/fluid/distributed/collective/CMakeLists.txt
@@ -1,5 +1,5 @@
 cc_library(processgroup SRCS ProcessGroup.cc DEPS phi phi_api eager_api)
-cc_library(eager_reducer SRCS reducer.cc DEPS eager_api processgroup phi phi_api)
+cc_library(eager_reducer SRCS reducer.cc DEPS eager_api processgroup phi phi_api string_helper)
 
 if (WITH_DISTRIBUTE)
   cc_library(processgroup_gloo SRCS ProcessGroupGloo.cc DEPS phi phi_api eager_api gloo_wrapper)

diff --git a/paddle/fluid/distributed/collective/ProcessGroupGloo.cc b/paddle/fluid/distributed/collective/ProcessGroupGloo.cc
@@ -171,10 +171,10 @@ ProcessGroupGloo::GlooTask::GlooTask(int rank,
                         "Only CPU place is supported for ProcessGroupGloo."));
 }
 
-ProcessGroupGloo::ProcessGroupGloo(const std::shared_ptr<GlooStore>& store,
-                                   int rank, int world_size,
-                                   const std::shared_ptr<GlooOptions> options)
-    : ProcessGroup(rank, world_size), _tag(0), _store(store) {
+ProcessGroupGloo::ProcessGroupGloo(
+    const std::shared_ptr<paddle::distributed::Store>& store, int rank,
+    int world_size, const std::shared_ptr<GlooOptions> options)
+    : ProcessGroup(rank, world_size), _tag(0), _store(new GlooStore(store)) {
   _context = std::make_shared<gloo::rendezvous::Context>(rank, world_size);
   auto prefix_store =
       ::gloo::rendezvous::PrefixStore(std::to_string(0), *_store);

diff --git a/paddle/fluid/distributed/collective/ProcessGroupGloo.h b/paddle/fluid/distributed/collective/ProcessGroupGloo.h
@@ -52,8 +52,7 @@ class ProcessGroupGloo : public ProcessGroup {
 
   class GlooStore : public ::gloo::rendezvous::Store {
    public:
-    explicit GlooStore(
-        const std::shared_ptr<paddle::distributed::TCPStore>& store)
+    explicit GlooStore(const std::shared_ptr<paddle::distributed::Store>& store)
         : _store(store) {}
 
     ~GlooStore() = default;
@@ -87,7 +86,7 @@ class ProcessGroupGloo : public ProcessGroup {
     }
 
    protected:
-    std::shared_ptr<paddle::distributed::TCPStore> _store;
+    std::shared_ptr<paddle::distributed::Store> _store;
   };
 
   class GlooOptions {
@@ -100,9 +99,9 @@ class ProcessGroupGloo : public ProcessGroup {
     std::shared_ptr<::gloo::transport::Device> device;
   };
 
-  explicit ProcessGroupGloo(const std::shared_ptr<GlooStore>& store, int rank,
-                            int world_size,
-                            std::shared_ptr<GlooOptions> options);
+  explicit ProcessGroupGloo(
+      const std::shared_ptr<paddle::distributed::Store>& store, int rank,
+      int world_size, std::shared_ptr<GlooOptions> options);
 
   ~ProcessGroupGloo() = default;
 
@@ -145,7 +144,7 @@ class ProcessGroupGloo : public ProcessGroup {
  protected:
   uint32_t _tag;
   std::shared_ptr<gloo::rendezvous::Context> _context;
-  std::shared_ptr<GlooStore> _store;
+  std::shared_ptr<::gloo::rendezvous::Store> _store;
 };
 
 }  // namespace distributed

diff --git a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc
@@ -395,7 +395,7 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::Barrier(
   platform::CUDADeviceGuard gpuGuard;
   for (auto& place : places) {
     gpuGuard.SetDeviceIndex(place.GetDeviceId());
-    auto dt = full({1}, 0, phi::DataType::FLOAT32, phi::Backend::GPU);
+    auto dt = full({1}, 0, phi::DataType::FLOAT32, phi::GPUPlace());
     barrierTensors.push_back(dt);
   }
   auto task = ProcessGroupNCCL::AllReduce(barrierTensors);
@@ -417,7 +417,7 @@ void CheckTensorsInDifferentDevices(const std::vector<Tensor>& tensors,
   std::set<Place> used_devices;
 
   for (const auto& t : tensors) {
-    PADDLE_ENFORCE_EQ(t.is_cuda() && t.is_dense_tensor(), true,
+    PADDLE_ENFORCE_EQ(t.is_gpu() && t.is_dense_tensor(), true,
                       platform::errors::InvalidArgument(
                           "Tensors must be CUDA and dense tensor."));