merge

PaddlePaddle · Mar 4, 2024 · 5a325f5 · 5a325f5
2 parents ee7feaf + c72c0d6
commit 5a325f5
Show file tree

Hide file tree

Showing 759 changed files with 14,774 additions and 7,495 deletions.
diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
@@ -294,7 +294,7 @@ select_nvcc_arch_flags(NVCC_FLAGS_EXTRA NVCC_ARCH_BIN)
 set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${NVCC_FLAGS_EXTRA}")
 message(STATUS "NVCC_FLAGS_EXTRA: ${NVCC_FLAGS_EXTRA}")
 
-# Set C++14 support
+# Set C++17 support
 set(CUDA_PROPAGATE_HOST_FLAGS OFF)
 # Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
 # So, don't set these flags here.

diff --git a/cmake/phi_header.cmake → cmake/export_paddle_header.cmake b/cmake/phi_header.cmake → cmake/export_paddle_header.cmake
@@ -15,33 +15,57 @@
 set(PADDLE_INFERENCE_INSTALL_DIR
     "${CMAKE_BINARY_DIR}/paddle_inference_install_dir")
 
-function(phi_header_path_compat TARGET_PATH)
-  message(STATUS "phi header path compat processing: ${TARGET_PATH}")
+function(header_path_compat TARGET_PATH)
+  message(STATUS "header path compat processing: ${TARGET_PATH}")
   file(GLOB HEADERS "${TARGET_PATH}/*" "*.h")
   foreach(header ${HEADERS})
     if(${header} MATCHES ".*.h$")
       file(READ ${header} HEADER_CONTENT)
       string(REPLACE "paddle/fluid/platform/" "paddle/phi/" HEADER_CONTENT
                      "${HEADER_CONTENT}")
+      string(REPLACE "paddle/pir/include/" "paddle/pir/" HEADER_CONTENT
+                     "${HEADER_CONTENT}")
+      string(REPLACE "paddle/fluid/pir/drr/include/" "paddle/pir/drr/"
+                     HEADER_CONTENT "${HEADER_CONTENT}")
+      string(REPLACE "paddle/fluid/pir/transforms/" "paddle/pir/transforms/"
+                     HEADER_CONTENT "${HEADER_CONTENT}")
       file(WRITE ${header} "${HEADER_CONTENT}")
-      message(STATUS "phi header path compat processing complete: ${header}")
+      message(STATUS "header path compat processing complete: ${header}")
     endif()
   endforeach()
 endfunction()
 
-phi_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle)
-phi_header_path_compat(
-  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi)
-phi_header_path_compat(
+header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle)
+header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi)
+header_path_compat(
   ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/api)
-phi_header_path_compat(
+header_path_compat(
   ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/api/ext)
-phi_header_path_compat(
+header_path_compat(
   ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/api/include)
-phi_header_path_compat(
+header_path_compat(
   ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/common)
-phi_header_path_compat(
+header_path_compat(
   ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/core)
+header_path_compat(
+  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/core)
+header_path_compat(
+  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/core/parser)
+header_path_compat(
+  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/dialect/control_flow/ir
+)
+header_path_compat(
+  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/dialect/shape/ir)
+header_path_compat(
+  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/dialect/shape/utils)
+header_path_compat(
+  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/drr)
+header_path_compat(
+  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/pass)
+header_path_compat(
+  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/pattern_rewrite)
+header_path_compat(
+  ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/transforms)
 
 # NOTE(liuyuanle): In inference lib, no need include paddle/utils/pybind.h, so we delete this.
 file(READ ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/extension.h

diff --git a/cmake/external/pslib.cmake b/cmake/external/pslib.cmake
@@ -69,7 +69,7 @@ ExternalProject_Add(
              -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
   CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_INSTALL_ROOT}
                    -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
-  BUILD_BYPRODUCTS ${PSLIB_LIB})
+  BUILD_BYPRODUCTS ${PSLIB_LIB} ${JVM_LIB})
 
 add_library(pslib SHARED IMPORTED GLOBAL)
 set_property(TARGET pslib PROPERTY IMPORTED_LOCATION ${PSLIB_LIB})

diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake
@@ -354,12 +354,54 @@ copy(
   SRCS ${PADDLE_SOURCE_DIR}/paddle/extension.h
   DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/)
 
-# the include path of phi needs to be changed to adapt to inference api path
+copy(
+  inference_lib_dist
+  SRCS ${PADDLE_SOURCE_DIR}/paddle/pir/include/core/parser/*.h
+  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/core/parser/)
+copy(
+  inference_lib_dist
+  SRCS ${PADDLE_SOURCE_DIR}/paddle/pir/include/core/*.h
+  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/core/)
+copy(
+  inference_lib_dist
+  SRCS ${PADDLE_SOURCE_DIR}/paddle/pir/include/dialect/control_flow/ir/*.h
+  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/dialect/control_flow/ir/
+)
+copy(
+  inference_lib_dist
+  SRCS ${PADDLE_SOURCE_DIR}/paddle/pir/include/dialect/shape/ir/*.h
+  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/dialect/shape/ir/
+)
+copy(
+  inference_lib_dist
+  SRCS ${PADDLE_SOURCE_DIR}/paddle/pir/include/dialect/shape/utils/*.h
+  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/dialect/shape/utils/
+)
+copy(
+  inference_lib_dist
+  SRCS ${PADDLE_SOURCE_DIR}/paddle/pir/include/pass/*.h
+  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/pass/)
+copy(
+  inference_lib_dist
+  SRCS ${PADDLE_SOURCE_DIR}/paddle/pir/include/pattern_rewrite/*.h
+  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/pattern_rewrite/
+)
+copy(
+  inference_lib_dist
+  SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/drr/include/*.h
+  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/drr/)
+copy(
+  inference_lib_dist
+  SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/transforms/transform_general_functions.h
+  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/transforms/)
+
+# the include path of paddle needs to be changed to adapt to inference api path
 add_custom_command(
   TARGET inference_lib_dist
   POST_BUILD
-  COMMAND ${CMAKE_COMMAND} -P "${PADDLE_SOURCE_DIR}/cmake/phi_header.cmake"
-  COMMENT "Change phi header include path to adapt to inference api path")
+  COMMAND ${CMAKE_COMMAND} -P
+          "${PADDLE_SOURCE_DIR}/cmake/export_paddle_header.cmake"
+  COMMENT "Change paddle header include path to adapt to inference api path")
 
 # CAPI inference library for only inference
 set(PADDLE_INFERENCE_C_INSTALL_DIR

diff --git a/paddle/cinn/ast_gen_ius/ast_gen.cc b/paddle/cinn/ast_gen_ius/ast_gen.cc
@@ -97,44 +97,11 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
     VLOG(4) << "FLAGS_group_schedule_tiling_first = "
             << FLAGS_group_schedule_tiling_first;
     std::vector<Var> axis_vars = cinn::common::GenDefaultAxis(axis_len);
-    const std::vector<ir::Var>& reduce_axes_vars = tensor->reduce_axis;
-    const auto reduce_axis_position = [&reduce_axes_vars, &tensor]() {
-      VLOG(4) << "start calculus reduce_axis_position: ";
-      std::vector<int> res;
-      const auto& fn_body =
-          tensor->operation.ptr()->as<ir::ComputeOp>()->body[0];
-      bool is_a_valid_reduce_op = fn_body.defined() && fn_body.As<ir::Reduce>();
-      if (!is_a_valid_reduce_op) {
-        PD_THROW(
-            "The reduce body is not a valid reduce op, please check the "
-            "input.");
-      }
-      const auto& reduce_body =
-          fn_body.As<ir::Reduce>()->body;  // reduce body is a tensor store.
-      const auto& load_indices = reduce_body.As<ir::Load>()->indices;
-      int position = -1;
-      for (const auto& obj : load_indices) {
-        position += 1;
-        for (auto& reduce_var : reduce_axes_vars) {
-          if (obj.as_var_ref() == reduce_var) {
-            res.push_back(position);
-          }
-        }
-      }
-      VLOG(4) << "reduce axis position is " << [&] {
-        std::stringstream ss;
-        for (int i : res) {
-          ss << i << " ";
-        }
-        return ss.str();
-      }();
-      return res;
-    }();
+    const std::vector<ir::Var>& reduce_axis = tensor->reduce_axis;
+    VLOG(4) << "ast gen: tensor init_body is " << init_body;
     for (int i = 0; i < shape.size(); ++i) {
-      bool reduce_axis_found = std::find(reduce_axis_position.begin(),
-                                         reduce_axis_position.end(),
-                                         i) != reduce_axis_position.end();
-      if (FLAGS_group_schedule_tiling_first && reduce_axis_found) {
+      bool is_keep_dim = axis[i]->is_keepdim;
+      if (FLAGS_group_schedule_tiling_first && is_keep_dim) {
         // if tiling first, we need to replace the reduce axis with 0, but don't
         // deal with the non-reduce axis
         optim::ReplaceVarWithExpr(&init_body, axis[i], Expr(0));
@@ -157,6 +124,8 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
         iter_values.push_back(axis_vars[i]);
       }
     }
+    VLOG(4) << "iter_value.size() and block_vars.size() is "
+            << iter_values.size() << " " << block_vars.size();
     init_body = ir::ScheduleBlockRealize::Make(
         iter_values,
         ir::ScheduleBlock::Make(
@@ -165,17 +134,18 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
     // For the remaining reduce axis, make reduce body
     ir::Expr reduce_body =
         ConvertReduceBody(tensor->body(), tensor, axis_exprs);
+
+    VLOG(4) << "ast gen: reduce body is " << reduce_body;
+
     // create schedule block itervars, i0,i1...
     std::vector<ir::Var> reduce_block_vars;
     std::vector<ir::Expr> reduce_iter_values;
     // reduce body and reduce init schedule block should have different objects
     // for same axis so we re-create objects
-    std::vector<Var> reduce_axis_vars = cinn::common::GenDefaultAxis(axis_len);
+    std::vector<Var> reduce_axes_vars = cinn::common::GenDefaultAxis(axis_len);
     for (int i = 0; i < shape.size(); ++i) {
-      bool reduce_axis_found = std::find(reduce_axis_position.begin(),
-                                         reduce_axis_position.end(),
-                                         i) != reduce_axis_position.end();
-      if (FLAGS_group_schedule_tiling_first && reduce_axis_found) {
+      bool is_keep_dim = axis[i]->is_keepdim;
+      if (FLAGS_group_schedule_tiling_first && is_keep_dim) {
         // if tiling first, we need to replace the reduce axis with 0, but don't
         // deal with the non-reduce axis
         optim::ReplaceVarWithExpr(&reduce_body, axis[i], Expr(0));
@@ -190,15 +160,16 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
                                       shape[i],
                                       cinn::UniqName("i" + std::to_string(i)),
                                       /*is_reduce = */ false));
-      reduce_axis_vars[i]->is_reduce_axis = false;
+      reduce_axes_vars[i]->is_reduce_axis = false;
       if (!FLAGS_group_schedule_tiling_first && shape[i] == Expr(1)) {
         reduce_iter_values.push_back(Expr(0));
       } else {
         reduce_iter_values.push_back(axis_vars[i]);
       }
     }
-    for (int i = 0; i < reduce_axes_vars.size(); ++i) {
-      int count = shape.size() + i;
+    VLOG(4) << "ast gen: reduce body is after replace 0" << reduce_body;
+    for (int i = 0; i < reduce_axis.size(); ++i) {
+      size_t count = shape.size() + i;
       reduce_block_vars.push_back(
           Var(reduce_axes_vars[i]->lower_bound,
               reduce_axes_vars[i]->upper_bound,
@@ -210,19 +181,43 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
     }
 
     int non_zero_axis_size = 0;
-    for (int i = 0; i < axis.size(); ++i) {
-      if (!FLAGS_group_schedule_tiling_first &&
-          FLAGS_cinn_new_group_scheduler && shape[i] == Expr(1)) {
-        continue;
+    if (FLAGS_group_schedule_tiling_first) {
+      std::vector<ir::Var> non_reduce_axes_vars = [&]() {
+        std::vector<ir::Var> res;
+        for (int i = 0; i < shape.size(); ++i) {
+          bool is_keep_dim = axis[i]->is_keepdim;
+          if (!is_keep_dim) {
+            res.push_back(axis[i]);
+          }
+        }
+        return res;
+      }();
+      for (int i = 0; i < non_reduce_axes_vars.size(); ++i) {
+        optim::ReplaceVarWithExpr(
+            &reduce_body, non_reduce_axes_vars[i], reduce_block_vars[i]);
+        ++non_zero_axis_size;
+      }
+    } else {
+      for (int i = 0; i < axis.size(); ++i) {
+        if (!FLAGS_group_schedule_tiling_first &&
+            FLAGS_cinn_new_group_scheduler && shape[i] == Expr(1)) {
+          continue;
+        }
+        optim::ReplaceVarWithExpr(
+            &reduce_body, axis[i], reduce_block_vars[non_zero_axis_size]);
+        ++non_zero_axis_size;
       }
-      optim::ReplaceVarWithExpr(
-          &reduce_body, axis[i], reduce_block_vars[non_zero_axis_size]);
-      ++non_zero_axis_size;
     }
 
-    if (FLAGS_group_schedule_tiling_first) {
-      non_zero_axis_size = axis.size() - reduce_axes_vars.size();
+    VLOG(4) << "to replace : " << non_zero_axis_size << " "
+            << reduce_block_vars.size();
+    for (auto i = 0; i < reduce_block_vars.size(); i++) {
+      VLOG(4) << "reduce_block_vars[" << i << "] = " << reduce_block_vars[i];
+    }
+    for (auto i = 0; i < reduce_axis.size(); i++) {
+      VLOG(4) << "reduce_axis[" << i << "] = " << reduce_axis[i];
     }
+    VLOG(4) << "before replace body: " << reduce_body;
     for (int i = non_zero_axis_size; i < reduce_block_vars.size(); ++i) {
       optim::ReplaceVarWithExpr(&reduce_body,
                                 reduce_axes_vars[i - non_zero_axis_size],
@@ -245,10 +240,8 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
     // Put the two parts together
     ir::Expr body = ir::Block::Make({init_body, reduce_body});
     for (int i = static_cast<int>(axis_len) - 1; i >= 0; --i) {
-      bool reduce_axis_found = std::find(reduce_axis_position.begin(),
-                                         reduce_axis_position.end(),
-                                         i) != reduce_axis_position.end();
-      if (FLAGS_group_schedule_tiling_first && reduce_axis_found) {
+      bool is_keep_dim = axis[i]->is_keepdim;
+      if (FLAGS_group_schedule_tiling_first && is_keep_dim) {
         continue;
       }
       if (!FLAGS_group_schedule_tiling_first && !FLAGS_cinn_bucket_compile &&

diff --git a/paddle/cinn/backends/codegen_c_test.cc b/paddle/cinn/backends/codegen_c_test.cc
@@ -61,9 +61,9 @@ TEST(CodeGenC, module) {
   LOG(INFO) << "C.body: " << C->get_compute_op()->body.front();
 
   Target target;
-  target.arch = Target::Arch ::X86;
-  target.bits = Target::Bit ::k32;
-  target.os = Target::OS ::Linux;
+  target.arch = Target::Arch::X86;
+  target.bits = Target::Bit::k32;
+  target.os = Target::OS::Linux;
   Module::Builder builder("module1", target);
 
   ast_gen_ius::TensorGroup tensor_group({A, B, C});

diff --git a/paddle/cinn/hlir/dialect/operator/ir/attribute_storage.h b/paddle/cinn/hlir/dialect/operator/ir/attribute_storage.h
@@ -71,6 +71,11 @@ struct GroupInfoAttributeStorage : public pir::AttributeStorage {
   static std::size_t HashValue(const ParamKey& key) {
     size_t hash_value = std::hash<std::string>{}(key.group_id);
 
+    for (auto op : key.ops) {
+      hash_value =
+          pir::detail::hash_combine(hash_value, std::hash<void*>()(op));
+    }
+
     for (auto d : key.loop_ranges) {
       hash_value =
           pir::detail::hash_combine(hash_value, std::hash<int64_t>()(d));

diff --git a/paddle/cinn/hlir/dialect/operator/ir/manual_op.cc b/paddle/cinn/hlir/dialect/operator/ir/manual_op.cc
@@ -24,6 +24,7 @@
 #include "paddle/fluid/pir/dialect/operator/ir/ir_tensor.h"
 #include "paddle/fluid/pir/dialect/operator/ir/op_type.h"
 #include "paddle/fluid/pir/dialect/operator/utils/utils.h"
+#include "paddle/fluid/pir/transforms/shape_optimization_pass.h"
 #include "paddle/pir/include/core/builtin_type.h"
 #include "paddle/pir/include/core/op_base.h"
 #include "paddle/pir/include/dialect/control_flow/ir/cf_op.h"
@@ -104,6 +105,20 @@ void GroupOp::Print(pir::IrPrinter& printer) {
   os << " \n }";
 }
 
+bool GroupOp::InferSymbolicShape(
+    ::pir::ShapeConstraintIRAnalysis* shape_analysis) {
+  ::pir::InferSymExprForBlock(*block(), shape_analysis);
+
+  for (uint32_t rst_idx = 0; rst_idx < num_results(); rst_idx++) {
+    auto inner_yield_value = block()->back().operand_source(rst_idx);
+    const auto& shape =
+        shape_analysis->GetShapeOrDataForValue(inner_yield_value);
+    shape_analysis->SetShapeOrDataForValue(result(rst_idx), shape);
+  }
+
+  return true;
+}
+
 void FusionOp::Build(pir::Builder& builder,
                      pir::OperationArgument& argument,
                      const std::vector<pir::Type>& output_types) {

diff --git a/paddle/cinn/hlir/dialect/operator/ir/manual_op.h b/paddle/cinn/hlir/dialect/operator/ir/manual_op.h
@@ -29,7 +29,8 @@
 namespace cinn {
 namespace dialect {
 
-class IR_API GroupOp : public pir::Op<GroupOp> {
+class IR_API GroupOp
+    : public pir::Op<GroupOp, paddle::dialect::InferSymbolicShapeInterface> {
  public:
   using Op::Op;
   static const char *name() { return "cinn_op.group"; }
@@ -51,6 +52,8 @@ class IR_API GroupOp : public pir::Op<GroupOp> {
   pir::Block *block();
   std::vector<pir::Operation *> GetOperators();
 
+  bool InferSymbolicShape(pir::ShapeConstraintIRAnalysis *shape_analysis);
+
   void VerifySig();
   void Print(pir::IrPrinter &printer);  // NOLINT
 };