Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
  • Loading branch information
2742195759 committed Mar 4, 2024
2 parents ee7feaf + c72c0d6 commit 5a325f5
Show file tree
Hide file tree
Showing 759 changed files with 14,774 additions and 7,495 deletions.
2 changes: 1 addition & 1 deletion cmake/cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ select_nvcc_arch_flags(NVCC_FLAGS_EXTRA NVCC_ARCH_BIN)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${NVCC_FLAGS_EXTRA}")
message(STATUS "NVCC_FLAGS_EXTRA: ${NVCC_FLAGS_EXTRA}")

# Set C++14 support
# Set C++17 support
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
# So, don't set these flags here.
Expand Down
46 changes: 35 additions & 11 deletions cmake/phi_header.cmake → cmake/export_paddle_header.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -15,33 +15,57 @@
set(PADDLE_INFERENCE_INSTALL_DIR
"${CMAKE_BINARY_DIR}/paddle_inference_install_dir")

function(phi_header_path_compat TARGET_PATH)
message(STATUS "phi header path compat processing: ${TARGET_PATH}")
function(header_path_compat TARGET_PATH)
message(STATUS "header path compat processing: ${TARGET_PATH}")
file(GLOB HEADERS "${TARGET_PATH}/*" "*.h")
foreach(header ${HEADERS})
if(${header} MATCHES ".*.h$")
file(READ ${header} HEADER_CONTENT)
string(REPLACE "paddle/fluid/platform/" "paddle/phi/" HEADER_CONTENT
"${HEADER_CONTENT}")
string(REPLACE "paddle/pir/include/" "paddle/pir/" HEADER_CONTENT
"${HEADER_CONTENT}")
string(REPLACE "paddle/fluid/pir/drr/include/" "paddle/pir/drr/"
HEADER_CONTENT "${HEADER_CONTENT}")
string(REPLACE "paddle/fluid/pir/transforms/" "paddle/pir/transforms/"
HEADER_CONTENT "${HEADER_CONTENT}")
file(WRITE ${header} "${HEADER_CONTENT}")
message(STATUS "phi header path compat processing complete: ${header}")
message(STATUS "header path compat processing complete: ${header}")
endif()
endforeach()
endfunction()

phi_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle)
phi_header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi)
phi_header_path_compat(
header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle)
header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi)
header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/api)
phi_header_path_compat(
header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/api/ext)
phi_header_path_compat(
header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/api/include)
phi_header_path_compat(
header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/common)
phi_header_path_compat(
header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/core)
header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/core)
header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/core/parser)
header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/dialect/control_flow/ir
)
header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/dialect/shape/ir)
header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/dialect/shape/utils)
header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/drr)
header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/pass)
header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/pattern_rewrite)
header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/transforms)

# NOTE(liuyuanle): In inference lib, no need include paddle/utils/pybind.h, so we delete this.
file(READ ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/extension.h
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/pslib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ ExternalProject_Add(
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_INSTALL_ROOT}
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${PSLIB_LIB})
BUILD_BYPRODUCTS ${PSLIB_LIB} ${JVM_LIB})

add_library(pslib SHARED IMPORTED GLOBAL)
set_property(TARGET pslib PROPERTY IMPORTED_LOCATION ${PSLIB_LIB})
Expand Down
48 changes: 45 additions & 3 deletions cmake/inference_lib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -354,12 +354,54 @@ copy(
SRCS ${PADDLE_SOURCE_DIR}/paddle/extension.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/)

# the include path of phi needs to be changed to adapt to inference api path
copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/pir/include/core/parser/*.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/core/parser/)
copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/pir/include/core/*.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/core/)
copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/pir/include/dialect/control_flow/ir/*.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/dialect/control_flow/ir/
)
copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/pir/include/dialect/shape/ir/*.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/dialect/shape/ir/
)
copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/pir/include/dialect/shape/utils/*.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/dialect/shape/utils/
)
copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/pir/include/pass/*.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/pass/)
copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/pir/include/pattern_rewrite/*.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/pattern_rewrite/
)
copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/drr/include/*.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/drr/)
copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/pir/transforms/transform_general_functions.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/pir/transforms/)

# the include path of paddle needs to be changed to adapt to inference api path
add_custom_command(
TARGET inference_lib_dist
POST_BUILD
COMMAND ${CMAKE_COMMAND} -P "${PADDLE_SOURCE_DIR}/cmake/phi_header.cmake"
COMMENT "Change phi header include path to adapt to inference api path")
COMMAND ${CMAKE_COMMAND} -P
"${PADDLE_SOURCE_DIR}/cmake/export_paddle_header.cmake"
COMMENT "Change paddle header include path to adapt to inference api path")

# CAPI inference library for only inference
set(PADDLE_INFERENCE_C_INSTALL_DIR
Expand Down
109 changes: 51 additions & 58 deletions paddle/cinn/ast_gen_ius/ast_gen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -97,44 +97,11 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
VLOG(4) << "FLAGS_group_schedule_tiling_first = "
<< FLAGS_group_schedule_tiling_first;
std::vector<Var> axis_vars = cinn::common::GenDefaultAxis(axis_len);
const std::vector<ir::Var>& reduce_axes_vars = tensor->reduce_axis;
const auto reduce_axis_position = [&reduce_axes_vars, &tensor]() {
VLOG(4) << "start calculus reduce_axis_position: ";
std::vector<int> res;
const auto& fn_body =
tensor->operation.ptr()->as<ir::ComputeOp>()->body[0];
bool is_a_valid_reduce_op = fn_body.defined() && fn_body.As<ir::Reduce>();
if (!is_a_valid_reduce_op) {
PD_THROW(
"The reduce body is not a valid reduce op, please check the "
"input.");
}
const auto& reduce_body =
fn_body.As<ir::Reduce>()->body; // reduce body is a tensor store.
const auto& load_indices = reduce_body.As<ir::Load>()->indices;
int position = -1;
for (const auto& obj : load_indices) {
position += 1;
for (auto& reduce_var : reduce_axes_vars) {
if (obj.as_var_ref() == reduce_var) {
res.push_back(position);
}
}
}
VLOG(4) << "reduce axis position is " << [&] {
std::stringstream ss;
for (int i : res) {
ss << i << " ";
}
return ss.str();
}();
return res;
}();
const std::vector<ir::Var>& reduce_axis = tensor->reduce_axis;
VLOG(4) << "ast gen: tensor init_body is " << init_body;
for (int i = 0; i < shape.size(); ++i) {
bool reduce_axis_found = std::find(reduce_axis_position.begin(),
reduce_axis_position.end(),
i) != reduce_axis_position.end();
if (FLAGS_group_schedule_tiling_first && reduce_axis_found) {
bool is_keep_dim = axis[i]->is_keepdim;
if (FLAGS_group_schedule_tiling_first && is_keep_dim) {
// if tiling first, we need to replace the reduce axis with 0, but don't
// deal with the non-reduce axis
optim::ReplaceVarWithExpr(&init_body, axis[i], Expr(0));
Expand All @@ -157,6 +124,8 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
iter_values.push_back(axis_vars[i]);
}
}
VLOG(4) << "iter_value.size() and block_vars.size() is "
<< iter_values.size() << " " << block_vars.size();
init_body = ir::ScheduleBlockRealize::Make(
iter_values,
ir::ScheduleBlock::Make(
Expand All @@ -165,17 +134,18 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
// For the remaining reduce axis, make reduce body
ir::Expr reduce_body =
ConvertReduceBody(tensor->body(), tensor, axis_exprs);

VLOG(4) << "ast gen: reduce body is " << reduce_body;

// create schedule block itervars, i0,i1...
std::vector<ir::Var> reduce_block_vars;
std::vector<ir::Expr> reduce_iter_values;
// reduce body and reduce init schedule block should have different objects
// for same axis so we re-create objects
std::vector<Var> reduce_axis_vars = cinn::common::GenDefaultAxis(axis_len);
std::vector<Var> reduce_axes_vars = cinn::common::GenDefaultAxis(axis_len);
for (int i = 0; i < shape.size(); ++i) {
bool reduce_axis_found = std::find(reduce_axis_position.begin(),
reduce_axis_position.end(),
i) != reduce_axis_position.end();
if (FLAGS_group_schedule_tiling_first && reduce_axis_found) {
bool is_keep_dim = axis[i]->is_keepdim;
if (FLAGS_group_schedule_tiling_first && is_keep_dim) {
// if tiling first, we need to replace the reduce axis with 0, but don't
// deal with the non-reduce axis
optim::ReplaceVarWithExpr(&reduce_body, axis[i], Expr(0));
Expand All @@ -190,15 +160,16 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
shape[i],
cinn::UniqName("i" + std::to_string(i)),
/*is_reduce = */ false));
reduce_axis_vars[i]->is_reduce_axis = false;
reduce_axes_vars[i]->is_reduce_axis = false;
if (!FLAGS_group_schedule_tiling_first && shape[i] == Expr(1)) {
reduce_iter_values.push_back(Expr(0));
} else {
reduce_iter_values.push_back(axis_vars[i]);
}
}
for (int i = 0; i < reduce_axes_vars.size(); ++i) {
int count = shape.size() + i;
VLOG(4) << "ast gen: reduce body is after replace 0" << reduce_body;
for (int i = 0; i < reduce_axis.size(); ++i) {
size_t count = shape.size() + i;
reduce_block_vars.push_back(
Var(reduce_axes_vars[i]->lower_bound,
reduce_axes_vars[i]->upper_bound,
Expand All @@ -210,19 +181,43 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
}

int non_zero_axis_size = 0;
for (int i = 0; i < axis.size(); ++i) {
if (!FLAGS_group_schedule_tiling_first &&
FLAGS_cinn_new_group_scheduler && shape[i] == Expr(1)) {
continue;
if (FLAGS_group_schedule_tiling_first) {
std::vector<ir::Var> non_reduce_axes_vars = [&]() {
std::vector<ir::Var> res;
for (int i = 0; i < shape.size(); ++i) {
bool is_keep_dim = axis[i]->is_keepdim;
if (!is_keep_dim) {
res.push_back(axis[i]);
}
}
return res;
}();
for (int i = 0; i < non_reduce_axes_vars.size(); ++i) {
optim::ReplaceVarWithExpr(
&reduce_body, non_reduce_axes_vars[i], reduce_block_vars[i]);
++non_zero_axis_size;
}
} else {
for (int i = 0; i < axis.size(); ++i) {
if (!FLAGS_group_schedule_tiling_first &&
FLAGS_cinn_new_group_scheduler && shape[i] == Expr(1)) {
continue;
}
optim::ReplaceVarWithExpr(
&reduce_body, axis[i], reduce_block_vars[non_zero_axis_size]);
++non_zero_axis_size;
}
optim::ReplaceVarWithExpr(
&reduce_body, axis[i], reduce_block_vars[non_zero_axis_size]);
++non_zero_axis_size;
}

if (FLAGS_group_schedule_tiling_first) {
non_zero_axis_size = axis.size() - reduce_axes_vars.size();
VLOG(4) << "to replace : " << non_zero_axis_size << " "
<< reduce_block_vars.size();
for (auto i = 0; i < reduce_block_vars.size(); i++) {
VLOG(4) << "reduce_block_vars[" << i << "] = " << reduce_block_vars[i];
}
for (auto i = 0; i < reduce_axis.size(); i++) {
VLOG(4) << "reduce_axis[" << i << "] = " << reduce_axis[i];
}
VLOG(4) << "before replace body: " << reduce_body;
for (int i = non_zero_axis_size; i < reduce_block_vars.size(); ++i) {
optim::ReplaceVarWithExpr(&reduce_body,
reduce_axes_vars[i - non_zero_axis_size],
Expand All @@ -245,10 +240,8 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
// Put the two parts together
ir::Expr body = ir::Block::Make({init_body, reduce_body});
for (int i = static_cast<int>(axis_len) - 1; i >= 0; --i) {
bool reduce_axis_found = std::find(reduce_axis_position.begin(),
reduce_axis_position.end(),
i) != reduce_axis_position.end();
if (FLAGS_group_schedule_tiling_first && reduce_axis_found) {
bool is_keep_dim = axis[i]->is_keepdim;
if (FLAGS_group_schedule_tiling_first && is_keep_dim) {
continue;
}
if (!FLAGS_group_schedule_tiling_first && !FLAGS_cinn_bucket_compile &&
Expand Down
6 changes: 3 additions & 3 deletions paddle/cinn/backends/codegen_c_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ TEST(CodeGenC, module) {
LOG(INFO) << "C.body: " << C->get_compute_op()->body.front();

Target target;
target.arch = Target::Arch ::X86;
target.bits = Target::Bit ::k32;
target.os = Target::OS ::Linux;
target.arch = Target::Arch::X86;
target.bits = Target::Bit::k32;
target.os = Target::OS::Linux;
Module::Builder builder("module1", target);

ast_gen_ius::TensorGroup tensor_group({A, B, C});
Expand Down
5 changes: 5 additions & 0 deletions paddle/cinn/hlir/dialect/operator/ir/attribute_storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ struct GroupInfoAttributeStorage : public pir::AttributeStorage {
static std::size_t HashValue(const ParamKey& key) {
size_t hash_value = std::hash<std::string>{}(key.group_id);

for (auto op : key.ops) {
hash_value =
pir::detail::hash_combine(hash_value, std::hash<void*>()(op));
}

for (auto d : key.loop_ranges) {
hash_value =
pir::detail::hash_combine(hash_value, std::hash<int64_t>()(d));
Expand Down
15 changes: 15 additions & 0 deletions paddle/cinn/hlir/dialect/operator/ir/manual_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "paddle/fluid/pir/dialect/operator/ir/ir_tensor.h"
#include "paddle/fluid/pir/dialect/operator/ir/op_type.h"
#include "paddle/fluid/pir/dialect/operator/utils/utils.h"
#include "paddle/fluid/pir/transforms/shape_optimization_pass.h"
#include "paddle/pir/include/core/builtin_type.h"
#include "paddle/pir/include/core/op_base.h"
#include "paddle/pir/include/dialect/control_flow/ir/cf_op.h"
Expand Down Expand Up @@ -104,6 +105,20 @@ void GroupOp::Print(pir::IrPrinter& printer) {
os << " \n }";
}

bool GroupOp::InferSymbolicShape(
::pir::ShapeConstraintIRAnalysis* shape_analysis) {
::pir::InferSymExprForBlock(*block(), shape_analysis);

for (uint32_t rst_idx = 0; rst_idx < num_results(); rst_idx++) {
auto inner_yield_value = block()->back().operand_source(rst_idx);
const auto& shape =
shape_analysis->GetShapeOrDataForValue(inner_yield_value);
shape_analysis->SetShapeOrDataForValue(result(rst_idx), shape);
}

return true;
}

void FusionOp::Build(pir::Builder& builder,
pir::OperationArgument& argument,
const std::vector<pir::Type>& output_types) {
Expand Down
5 changes: 4 additions & 1 deletion paddle/cinn/hlir/dialect/operator/ir/manual_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
namespace cinn {
namespace dialect {

class IR_API GroupOp : public pir::Op<GroupOp> {
class IR_API GroupOp
: public pir::Op<GroupOp, paddle::dialect::InferSymbolicShapeInterface> {
public:
using Op::Op;
static const char *name() { return "cinn_op.group"; }
Expand All @@ -51,6 +52,8 @@ class IR_API GroupOp : public pir::Op<GroupOp> {
pir::Block *block();
std::vector<pir::Operation *> GetOperators();

bool InferSymbolicShape(pir::ShapeConstraintIRAnalysis *shape_analysis);

void VerifySig();
void Print(pir::IrPrinter &printer); // NOLINT
};
Expand Down
Loading

0 comments on commit 5a325f5

Please sign in to comment.