PaddlePaddle · Shixiaowei02 · Feb 16, 2022 · Feb 9, 2022 · Feb 10, 2022 · Feb 10, 2022
diff --git a/.gitignore b/.gitignore
@@ -9,6 +9,7 @@ paddle/pten/api/lib/api.cc
 paddle/pten/api/backward/backward_api.h
 paddle/pten/api/lib/backward_api.cc
 paddle/pten/include/*
+paddle/pten/infermeta/generated.*
 paddle/pten/extension.h
 paddle/fluid/eager/api/generated/*
 

diff --git a/AUTHORS.md b/AUTHORS.md
@@ -83,3 +83,4 @@
 | jeng1220 | Bai-Cheng(Ryan) Jeng (NVIDIA) |
 | mingxu1067 | Ming Huang (NVIDIA) |
 | zlsh80826 | Reese Wang (NVIDIA) |
+| leo0519 | Leo Chen (NVIDIA) |
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -242,6 +242,15 @@ option(NEW_RELEASE_CUBIN   "PaddlePaddle next-level release strategy for pypi cu
 option(NEW_RELEASE_JIT   "PaddlePaddle next-level release strategy for backup jit package"             OFF)
 option(WITH_ASCEND_INT64 "Compile with int64 kernel for ascend NPU"    OFF)
 option(WITH_POCKETFFT    "Compile with pocketfft support"      ON)
+option(WITH_RECORD_BUILDTIME    "Compile PaddlePaddle with record all targets build time"       OFF)
+
+if(WITH_RECORD_BUILDTIME)
+    set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CMAKE_CURRENT_SOURCE_DIR}/tools/get_build_time.sh")
+    set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK "${CMAKE_CURRENT_SOURCE_DIR}/tools/get_build_time.sh")
+else()            
+    include(ccache) # set ccache for compilation ; if WITH_RECORD_BUILDTIME=ON can't use ccache
+endif()
+unset(WITH_RECORD_BUILDTIME CACHE)
 
 # PY_VERSION
 if(NOT PY_VERSION)
@@ -382,7 +391,6 @@ if(WITH_PROFILER)
     add_definitions(-DWITH_GPERFTOOLS)
 endif()
 
-include(ccache)             # set ccache for compilation
 include(util)               # set unittest and link libs
 include(version)            # set PADDLE_VERSION
 include(coveralls)          # set code coverage

diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
@@ -2,7 +2,6 @@ if(NOT WITH_GPU)
     return()
 endif()
 
-
 if(WITH_NV_JETSON)
   add_definitions(-DWITH_NV_JETSON)
   set(paddle_known_gpu_archs "53 62 72")

diff --git a/cmake/operators.cmake b/cmake/operators.cmake
@@ -335,6 +335,17 @@ function(op_library TARGET)
         endif()
     endforeach()
 
+    # pybind USE_OP_DEVICE_KERNEL for ROCm
+    list (APPEND hip_srcs ${hip_cc_srcs})
+    # message("hip_srcs ${hip_srcs}")
+    foreach(hip_src ${hip_srcs})
+        set(op_name "")
+        find_register(${hip_src} "REGISTER_OP_CUDA_KERNEL" op_name)
+        if(NOT ${op_name} EQUAL "")
+            file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n")
+            set(pybind_flag 1)
+        endif()
+    endforeach()
 
     # pybind USE_OP_DEVICE_KERNEL for CUDNN/MIOPEN
     list(APPEND cudnn_cu_srcs ${cudnn_cu_cc_srcs}) 

diff --git a/paddle/fluid/distributed/ps/service/communicator/communicator.h b/paddle/fluid/distributed/ps/service/communicator/communicator.h
@@ -35,12 +35,12 @@ limitations under the License. */
 #include "paddle/fluid/framework/variable.h"
 #include "paddle/fluid/framework/variable_helper.h"
 #include "paddle/fluid/operators/math/blas.h"
-#include "paddle/fluid/operators/math/math_function.h"
 #include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/string/split.h"
+#include "paddle/pten/kernels/funcs/math_function.h"
 
 #include "paddle/fluid/distributed/ps/service/ps_client.h"
 
@@ -180,7 +180,7 @@ inline void MergeVars(const std::string &var_name,
 
     // set output tensor to 0.
     paddle::platform::CPUDeviceContext cpu_ctx;
-    paddle::operators::math::SetConstant<paddle::platform::CPUDeviceContext, T>
+    pten::funcs::SetConstant<paddle::platform::CPUDeviceContext, T>
         constant_functor;
     constant_functor(cpu_ctx, out_t, static_cast<T>(0));
     // sum all vars to out

diff --git a/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h b/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h
@@ -38,9 +38,10 @@
 #include "paddle/fluid/distributed/ps/service/ps_service/service.h"
 #include "paddle/fluid/distributed/ps/service/sendrecv.pb.h"
 #include "paddle/fluid/framework/program_desc.h"
-#include "paddle/fluid/operators/math/math_function.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/string/printf.h"
+#include "paddle/pten/kernels/funcs/math_function.h"
+
 namespace paddle {
 namespace distributed {
 class GraphPyService {

diff --git a/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc b/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc
@@ -21,8 +21,8 @@ limitations under the License. */
 #include "paddle/fluid/distributed/ps/service/brpc_ps_server.h"
 #include "paddle/fluid/framework/program_desc.h"
 #include "paddle/fluid/framework/scope.h"
-#include "paddle/fluid/operators/math/math_function.h"
 #include "paddle/fluid/platform/place.h"
+#include "paddle/pten/kernels/funcs/math_function.h"
 
 namespace paddle {
 namespace distributed {
@@ -42,7 +42,6 @@ class DenseTensor;
 namespace framework = paddle::framework;
 namespace platform = paddle::platform;
 namespace operators = paddle::operators;
-namespace math = paddle::operators::math;
 namespace memory = paddle::memory;
 namespace distributed = paddle::distributed;
 

diff --git a/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc b/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc
@@ -22,8 +22,8 @@ limitations under the License. */
 #include "paddle/fluid/distributed/ps/service/brpc_ps_server.h"
 #include "paddle/fluid/distributed/ps/service/env.h"
 #include "paddle/fluid/framework/program_desc.h"
-#include "paddle/fluid/operators/math/math_function.h"
 #include "paddle/fluid/platform/place.h"
+#include "paddle/pten/kernels/funcs/math_function.h"
 
 namespace paddle {
 namespace distributed {
@@ -43,7 +43,6 @@ class DenseTensor;
 namespace framework = paddle::framework;
 namespace platform = paddle::platform;
 namespace operators = paddle::operators;
-namespace math = paddle::operators::math;
 namespace memory = paddle::memory;
 namespace distributed = paddle::distributed;
 

diff --git a/paddle/fluid/distributed/test/brpc_utils_test.cc b/paddle/fluid/distributed/test/brpc_utils_test.cc
@@ -17,7 +17,7 @@ limitations under the License. */
 #include "gtest/gtest.h"
 
 #include "paddle/fluid/distributed/ps/service/brpc_utils.h"
-#include "paddle/fluid/operators/math/math_function.h"
+#include "paddle/pten/kernels/funcs/math_function.h"
 
 namespace paddle {
 namespace framework {
@@ -28,7 +28,6 @@ class Variable;
 namespace framework = paddle::framework;
 namespace platform = paddle::platform;
 namespace operators = paddle::operators;
-namespace math = paddle::operators::math;
 namespace memory = paddle::memory;
 namespace distributed = paddle::distributed;
 
@@ -42,7 +41,7 @@ void CreateVarsOnScope(framework::Scope* scope, platform::Place* place,
   lod1.push_back(framework::Vector<size_t>({1, 3, 8}));
   tensor1->set_lod(lod1);
   tensor1->mutable_data<float>(*place);
-  math::set_constant(ctx, tensor1, 31.9);
+  pten::funcs::set_constant(ctx, tensor1, 31.9);
 
   // var 2
   framework::Variable* var2 = scope->Var("x2");
@@ -52,7 +51,7 @@ void CreateVarsOnScope(framework::Scope* scope, platform::Place* place,
   lod2.push_back(framework::Vector<size_t>({1, 1}));
   tensor2->set_lod(lod2);
   tensor2->mutable_data<int>(*place);
-  math::set_constant(ctx, tensor2, 100);
+  pten::funcs::set_constant(ctx, tensor2, 100);
 
   // var 3
   framework::Variable* var3 = scope->Var("x3");
@@ -62,7 +61,7 @@ void CreateVarsOnScope(framework::Scope* scope, platform::Place* place,
   auto* rows = slr->mutable_rows();
   tensor3->Resize(framework::make_ddim({564, 128}));
   tensor3->mutable_data<float>(*place);
-  math::set_constant(ctx, tensor3, 32.7);
+  pten::funcs::set_constant(ctx, tensor3, 32.7);
   for (int i = 0; i < 564; ++i) rows->push_back(i);
 }
 

diff --git a/paddle/fluid/distributed/test/graph_node_split_test.cc b/paddle/fluid/distributed/test/graph_node_split_test.cc
@@ -36,14 +36,13 @@ limitations under the License. */
 #include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/framework/variable.h"
-#include "paddle/fluid/operators/math/math_function.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/string/printf.h"
+#include "paddle/pten/kernels/funcs/math_function.h"
 
 namespace framework = paddle::framework;
 namespace platform = paddle::platform;
 namespace operators = paddle::operators;
-namespace math = paddle::operators::math;
 namespace memory = paddle::memory;
 namespace distributed = paddle::distributed;
 

diff --git a/paddle/fluid/distributed/test/graph_node_test.cc b/paddle/fluid/distributed/test/graph_node_test.cc
@@ -36,14 +36,13 @@ limitations under the License. */
 #include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/framework/variable.h"
-#include "paddle/fluid/operators/math/math_function.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/string/printf.h"
+#include "paddle/pten/kernels/funcs/math_function.h"
 
 namespace framework = paddle::framework;
 namespace platform = paddle::platform;
 namespace operators = paddle::operators;
-namespace math = paddle::operators::math;
 namespace memory = paddle::memory;
 namespace distributed = paddle::distributed;
 

diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/CMakeLists.txt b/paddle/fluid/eager/auto_code_generator/final_state_generator/CMakeLists.txt
@@ -24,3 +24,13 @@ add_custom_target(eager_final_state_codegen
     COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_nodes_h_path} ${nodes_h_path}
     VERBATIM
 )
+
+set(tmp_python_c_output_path "${PADDLE_SOURCE_DIR}/paddle/fluid/pybind/tmp_eager_final_state_op_function_impl.h")
+set(python_c_output_path "${PADDLE_SOURCE_DIR}/paddle/fluid/pybind/eager_final_state_op_function_impl.h")
+add_custom_target(eager_final_state_python_c_codegen
+    COMMAND "${PYTHON_EXECUTABLE}" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py" 
+            "--api_yaml_path=${api_yaml_path}"
+            "--output_path=${tmp_python_c_output_path}"
+    COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_python_c_output_path} ${python_c_output_path}
+    VERBATIM
+)