Skip to content

Commit

Permalink
Merge branch 'develop' into add_multigammaln_api
Browse files Browse the repository at this point in the history
  • Loading branch information
GreatV committed Sep 26, 2023
2 parents 85425ba + a6f1fbf commit bee08f6
Show file tree
Hide file tree
Showing 881 changed files with 21,741 additions and 12,504 deletions.
2 changes: 1 addition & 1 deletion .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ bugprone-argument-comment,
-bugprone-assert-side-effect,
-bugprone-bad-signal-to-kill-thread,
-bugprone-bool-pointer-implicit-conversion,
-bugprone-branch-clone,
bugprone-branch-clone,
bugprone-copy-constructor-init,
-bugprone-dangling-handle,
-bugprone-dynamic-static-initializers,
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ repos:
description: Check C++ code style using cpplint.py.
entry: bash ./tools/codestyle/cpplint_pre_commit.hook
language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$
files: \.(cc|cxx|cpp|cu|h|hpp|hxx)$
args:
- --extensions=c,cc,cxx,cpp,cu,cuh,h,hpp,hxx,kps
- --extensions=cc,cxx,cpp,cu,cuh,h,hpp,hxx,kps
- --filter=-readability/fn_size,-build/include_what_you_use,-build/c++11,-whitespace/parens
- --quiet
# Exclude third-party libraries
Expand Down
2 changes: 1 addition & 1 deletion paddle/cinn/ast_gen_ius/tensor_group.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ TensorGroup::TensorGroup(const std::vector<ir::Tensor>& tensors) {

for (auto& tensor : tensors) {
output_tensor_names_.insert(tensor->name);
std::set<ir::Expr> used_tensors = ir::CollectIRNodes(
std::set<ir::Expr> used_tensors = ir::ir_utils::CollectIRNodes(
tensor->body(), [](const Expr* x) { return x->as_tensor(); });
for (const Expr& x : used_tensors) {
const ir::Tensor to_dep = x.as_tensor_ref();
Expand Down
39 changes: 20 additions & 19 deletions paddle/cinn/auto_schedule/analysis/analyze_ir.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ std::vector<ir::Var> IndicesToVars(const std::vector<ir::Expr>& indices) {
for (const ir::Expr& e : indices) {
// Whether we have to convert other types, like const numbers to Var?
if (e.As<ir::_Var_>() != nullptr) {
ir::Expr copy_e = optim::IRCopy(e);
ir::Expr copy_e = ir::ir_utils::IRCopy(e);
ir::_Var_* var_ref = copy_e.As<ir::_Var_>();
result.emplace_back(ir::Var(var_ref));
}
Expand All @@ -54,29 +54,30 @@ void AnalyzeScheduleBlockReadWriteBuffer(ir::ScheduleBlock* sche_block) {
return;
}

ir::CollectIRNodesWithoutTensor(sche_block->body, [&](const Expr* x) {
const ir::Load* load_expr = x->As<ir::Load>();
if (load_expr != nullptr) {
const ir::Tensor t = load_expr->tensor.as_tensor_ref();
sche_block->read_buffers.emplace_back(
ir::BufferRange(t->buffer, IndicesToVars(load_expr->indices)));
return false;
}
const ir::Store* store_expr = x->As<ir::Store>();
if (store_expr != nullptr) {
const ir::Tensor t = store_expr->tensor.as_tensor_ref();
sche_block->write_buffers.emplace_back(
ir::BufferRange(t->buffer, IndicesToVars(store_expr->indices)));
return false;
}
return false;
});
ir::ir_utils::CollectIRNodesWithoutTensor(
sche_block->body, [&](const Expr* x) {
const ir::Load* load_expr = x->As<ir::Load>();
if (load_expr != nullptr) {
const ir::Tensor t = load_expr->tensor.as_tensor_ref();
sche_block->read_buffers.emplace_back(
ir::BufferRange(t->buffer, IndicesToVars(load_expr->indices)));
return false;
}
const ir::Store* store_expr = x->As<ir::Store>();
if (store_expr != nullptr) {
const ir::Tensor t = store_expr->tensor.as_tensor_ref();
sche_block->write_buffers.emplace_back(
ir::BufferRange(t->buffer, IndicesToVars(store_expr->indices)));
return false;
}
return false;
});
}

bool ContainsNodeType(ir::Expr expr,
const std::unordered_set<ir::IrNodeTy>& node_types) {
std::set<ir::Expr> collection =
ir::CollectIRNodesWithoutTensor(expr, [&](const Expr* x) {
ir::ir_utils::CollectIRNodesWithoutTensor(expr, [&](const Expr* x) {
return node_types.find(x->node_type()) != node_types.end();
});
return !collection.empty();
Expand Down
2 changes: 1 addition & 1 deletion paddle/cinn/auto_schedule/cost_model/feature_extractor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ void FeatureExtractor::Visit(const For *x) {
}

void FeatureExtractor::Visit(const PolyFor *x) {
Expr copy = optim::IRCopy(Expr(x));
Expr copy = ir::ir_utils::IRCopy(Expr(x));
feature_.IntoLoopBlock();
optim::TransformPolyForToFor(&copy);
ir::For *loop = copy.As<For>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ ir::IRSchedule MakeIRSchedule(const std::vector<ir::LoweredFunc>& lowered_funcs,
const std::string& task_key) {
std::vector<Expr> exprs;
for (auto&& func : lowered_funcs) {
exprs.emplace_back(optim::IRCopy(func->body));
exprs.emplace_back(ir::ir_utils::IRCopy(func->body));
}
InitialTaskRegistry* task_registry = InitialTaskRegistry::Global();
task_registry->Regist(task_key, ir::ModuleExpr(exprs));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ bool IsSpatialLoop(const ir::For* for_node) {
const auto& loop_var = for_node->loop_var;
// collect cases where the loop_var used in one of reduce axis in underneath
// ScheduleBlock
auto used_for_reduce_axis = ir::CollectIRNodesWithoutTensor(
auto used_for_reduce_axis = ir::ir_utils::CollectIRNodesWithoutTensor(
for_node->body, [&loop_var](const Expr* x) {
const auto* block_realize = x->As<ir::ScheduleBlockRealize>();
if (!block_realize) return false;
Expand All @@ -46,7 +46,7 @@ bool IsSpatialLoop(const ir::For* for_node) {
const ir::Expr& binding = block_realize->iter_values[i];
if (iter_var->is_reduce_axis ||
iter_var->name.substr(0, 6) == "reduce") {
auto used_exprs = ir::CollectIRNodesWithoutTensor(
auto used_exprs = ir::ir_utils::CollectIRNodesWithoutTensor(
binding, [&loop_var](const Expr* x) {
const ir::_Var_* var = x->As<ir::_Var_>();
if (var &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ bool AutoInline::CanInlineIntoConsumer(const Expr& sche_block_realize_expr,
ir::Expr root = ir_sch->GetRootBlock(sche_block_realize_expr);

// Check the schedule block to be inlined is not a reduce tensor.
std::set<ir::Expr> find_store = ir::CollectIRNodesWithoutTensor(
std::set<ir::Expr> find_store = ir::ir_utils::CollectIRNodesWithoutTensor(
compute_body, [&](const Expr* x) { return x->As<ir::Store>(); });
if (find_store.size() != 1UL) {
return false;
Expand All @@ -76,17 +76,19 @@ bool AutoInline::CanInlineIntoConsumer(const Expr& sche_block_realize_expr,
}

// Check this schedule block is the only writer of the tensor.
find_store = ir::CollectIRNodesWithoutTensor(root, [&](const Expr* x) {
return x->As<ir::Store>() &&
(x->As<ir::Store>()->tensor).as_tensor_ref()->name == tensor->name;
});
find_store =
ir::ir_utils::CollectIRNodesWithoutTensor(root, [&](const Expr* x) {
return x->As<ir::Store>() &&
(x->As<ir::Store>()->tensor).as_tensor_ref()->name ==
tensor->name;
});
if (find_store.size() != 1UL) {
return false;
}
// Check there is no overlap between the buffers the schedule block reads and
// writes.
std::set<ir::Expr> find_load =
ir::CollectIRNodesWithoutTensor(compute_body, [&](const Expr* x) {
std::set<ir::Expr> find_load = ir::ir_utils::CollectIRNodesWithoutTensor(
compute_body, [&](const Expr* x) {
return x->As<ir::Load>() && x->As<ir::Load>()->tensor == tensor_expr;
});
if (!find_load.empty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ bool AutoUnroll::MeetCondition(const ir::ScheduleBlock* schedule_block) const {
return false;
};

auto find_target_exprs = ir::CollectIRNodesWithoutTensor(
auto find_target_exprs = ir::ir_utils::CollectIRNodesWithoutTensor(
schedule_block->body,
[&has_reduce_iter, &has_nonserial_loop](const Expr* x) {
return has_reduce_iter(x) || has_nonserial_loop(x);
Expand Down
7 changes: 3 additions & 4 deletions paddle/cinn/auto_schedule/search_space/search_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,10 @@ bool SearchStateEqual::operator()(const SearchState& lhs,
// compare exprs size firstly
if (lhs_exprs.size() != rhs_exprs.size()) return false;

// compare every expr one by one with ir::IrEqualVisitor
// compare every expr one by one with ir::ir_utils::IrEqualVisitor
for (int i = 0; i < lhs_exprs.size(); ++i) {
ir::IrEqualVisitor compartor(
/*allow_name_suffix_diff=*/true); // ignore suffix difference in name
if (!compartor.Compare(lhs_exprs[i], rhs_exprs[i])) return false;
if (!ir::ir_utils::IRCompare(lhs_exprs[i], rhs_exprs[i], true))
return false;
}
return true;
}
Expand Down
4 changes: 2 additions & 2 deletions paddle/cinn/auto_schedule/search_space/search_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ struct SearchStateHash {
size_t operator()(const SearchState& s) const;
};

// SearchStateHash equal functor, use ir::IrEqualVisitor to compare their AST
// struct and fields
// SearchStateHash equal functor, use ir::ir_utils::IrEqualVisitor to compare
// their AST struct and fields
struct SearchStateEqual {
bool operator()(const SearchState& lhs, const SearchState& rhs) const;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ std::vector<SearchState> EvolutionarySearch::GetTopKCandidatesFromDatabase(
InitialTaskRegistry* task_registry = InitialTaskRegistry::Global();
for (auto&& record : records) {
ir::IRSchedule ir_sch(
optim::IRCopy(task_registry->Get(task_key)->module_expr),
ir::ir_utils::IRCopy(task_registry->Get(task_key)->module_expr),
utils::ForkRandomState(&rand_seed_));
ir::ScheduleDesc::ReplayWithProto(record.trace, &ir_sch);
results.emplace_back(SearchState(std::move(ir_sch), record.predicted_cost));
Expand Down Expand Up @@ -181,9 +181,9 @@ SearchState EvolutionarySearch::CrossOver(const SearchState& state1,

for (size_t i = 0; i < father_exprs.size(); ++i) {
if (utils::SampleUniformInt(0, 2, &rand_seed_) == 0) {
cross_over_exprs.push_back(optim::IRCopy(father_exprs[i]));
cross_over_exprs.push_back(ir::ir_utils::IRCopy(father_exprs[i]));
} else {
cross_over_exprs.push_back(optim::IRCopy(mother_exprs[i]));
cross_over_exprs.push_back(ir::ir_utils::IRCopy(mother_exprs[i]));
}
}
auto res = SearchState(ir::IRSchedule(ir::ModuleExpr(cross_over_exprs),
Expand Down Expand Up @@ -217,7 +217,7 @@ SearchState EvolutionarySearch::Mutate(
const auto& task_key = tune_task_.serialized_key;
InitialTaskRegistry* task_registry = InitialTaskRegistry::Global();
ir::IRSchedule new_ir_sch(
optim::IRCopy(task_registry->Get(task_key)->module_expr),
ir::ir_utils::IRCopy(task_registry->Get(task_key)->module_expr),
utils::ForkRandomState(rand_seed));
new_trace.Replay(&new_ir_sch, true);
ApplyPostScheduleRules(&new_ir_sch, post_schedule_rules_);
Expand Down
4 changes: 2 additions & 2 deletions paddle/cinn/auto_schedule/task/task_optimizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ TaskOptimizer::Result TaskOptimizer::OptimizeByEvolution(
auto& optimized_funcs = result.functions;
auto& best_cost = result.cost;
// use initial lowered function as default result
optimized_funcs = optim::IRCopy(task_->lowered_funcs);
optimized_funcs = ir::ir_utils::IRCopy(task_->lowered_funcs);
if (options.num_measure_trials ==
0) { // no need to measure and simply return the best searched
std::vector<MeasureInput> measure_candidates;
Expand Down Expand Up @@ -347,7 +347,7 @@ std::vector<SearchState> TaskOptimizer::SearchOneRound(
CHECK_EQ(best_exprs.size(), task_->lowered_funcs.size())
<< "RuntimeError: Expr size is not equal to LoweredFunc size in "
"TaskOptimizer";
auto init_funcs = optim::IRCopy(task_->lowered_funcs);
auto init_funcs = ir::ir_utils::IRCopy(task_->lowered_funcs);
std::vector<ir::LoweredFunc> valid_funcs;
for (size_t j = 0; j < best_exprs.size(); ++j) {
auto updated_f =
Expand Down
2 changes: 1 addition & 1 deletion paddle/cinn/auto_schedule/task/task_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class InitialTaskRegistry : public Registry<InitialTaskInfo> {
std::lock_guard<std::mutex> guard(registering_mutex);
if (fmap_.count(task_key) == 0) {
InitialTaskInfo* task_info =
new InitialTaskInfo(task_key, optim::IRCopy(module_expr));
new InitialTaskInfo(task_key, ir::ir_utils::IRCopy(module_expr));
__REGISTER__(task_key, task_info);
}
}
Expand Down
2 changes: 1 addition & 1 deletion paddle/cinn/backends/codegen_c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ using cinn::common::float16;
const char *kCKeywordRestrict = "__restrict__";

void CodeGenC::Compile(const ir::Module &module, const Outputs &outputs) {
ir::IrVerify(Expr(module));
ir::ir_utils::IrVerify(Expr(module));

if (!outputs.c_header_name.empty()) {
auto source = Compile(module, OutputKind::CHeader);
Expand Down
4 changes: 2 additions & 2 deletions paddle/cinn/backends/codegen_cuda_dev.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ std::string CodeGenCUDA_Dev::Compile(const ir::Module &module, bool for_nvrtc) {

void CodeGenCUDA_Dev::Compile(const ir::Module &module,
const Outputs &outputs) {
ir::IrVerify(Expr(module));
ir::ir_utils::IrVerify(Expr(module));

CodeGenC::inline_builtin_codes_ = false;
if (!outputs.c_header_name.empty()) {
Expand Down Expand Up @@ -90,7 +90,7 @@ std::vector<Expr> CodeGenCUDA_Dev::GenerateBufferAliasExprs(
temp_buffers.end());
// prepare temp buffer alias
std::vector<Expr> buffer_alias;
auto tensors = ir::CollectIRNodes(op->body, [&](const Expr *x) {
auto tensors = ir::ir_utils::CollectIRNodes(op->body, [&](const Expr *x) {
return x->as_tensor() && x->as_tensor()->buffer.defined() &&
temp_buffer_set.count(x->as_tensor()->buffer);
});
Expand Down
2 changes: 1 addition & 1 deletion paddle/cinn/backends/codegen_cuda_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ struct CollectHostFunctionVisitor : public ir::IRMutator<> {
}

Expr CreateDeviceFunctionGivenDeviceKernel(Expr expr) {
auto copied = optim::IRCopy(expr);
auto copied = ir::ir_utils::IRCopy(expr);
auto* lowered_func = copied.as_lowered_func();
lowered_func->name = GenDeviceKernelName(lowered_func->name);
return copied;
Expand Down
41 changes: 30 additions & 11 deletions paddle/cinn/backends/compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ using CompilationStatus = hlir::framework::CompilationStatus;
static constexpr int DebugLogMaxLen = 30000;

void CompilationInfoDumper::DumpLoweredFuncByGroupIndex(
const ir::LoweredFunc& lowered_func, const int gidx) {
const ir::LoweredFunc& lowered_func, const int gidx, const int device_id) {
if (FLAGS_cinn_dump_group_lowered_func.empty() ||
lowered_func.get() == nullptr) {
return;
Expand All @@ -54,34 +54,42 @@ void CompilationInfoDumper::DumpLoweredFuncByGroupIndex(
content << lowered_func;
Dump(FLAGS_cinn_dump_group_lowered_func,
gidx,
device_id,
"lowered_function.txt",
content.str());
}

void CompilationInfoDumper::DumpSourceCodeByGroupIndex(
const std::string& source_code, const int gidx) {
const std::string& source_code, const int gidx, const int device_id) {
if (FLAGS_cinn_dump_group_source_code.empty()) {
return;
}
Dump(FLAGS_cinn_dump_group_source_code, gidx, "source_code.cu", source_code);
Dump(FLAGS_cinn_dump_group_source_code,
gidx,
device_id,
"source_code.cu",
source_code);
}

void CompilationInfoDumper::DumpPtxCodeByGroupIndex(
const std::string& source_ptx, const int gidx) {
const std::string& source_ptx, const int gidx, const int device_id) {
if (FLAGS_cinn_dump_group_ptx.empty()) {
return;
}
Dump(FLAGS_cinn_dump_group_ptx, gidx, "source_ptx.ptx", source_ptx);
Dump(
FLAGS_cinn_dump_group_ptx, gidx, device_id, "source_ptx.ptx", source_ptx);
}

void CompilationInfoDumper::DumpInstructionByGroupIndex(
const std::unique_ptr<cinn::hlir::framework::Instruction>& instr,
const int gidx) {
const int gidx,
const int device_id) {
if (FLAGS_cinn_dump_group_instruction.empty() || instr.get() == nullptr) {
return;
}
Dump(FLAGS_cinn_dump_group_instruction,
gidx,
device_id,
"instruction.txt",
instr->DumpInstruction());
}
Expand All @@ -99,6 +107,7 @@ void CompilationInfoDumper::DumpLoweredFunc() {
}
Dump(FLAGS_cinn_dump_group_lowered_func,
idx,
device_id_,
"lowered_function.txt",
content.str());
}
Expand All @@ -115,7 +124,11 @@ void CompilationInfoDumper::DumpSourceCode() {
} else {
dump_str = "[No source code generated]\n\n" + info_.Message(idx);
}
Dump(FLAGS_cinn_dump_group_source_code, idx, "source_code.cu", dump_str);
Dump(FLAGS_cinn_dump_group_source_code,
idx,
device_id_,
"source_code.cu",
dump_str);
}
}

Expand All @@ -130,7 +143,8 @@ void CompilationInfoDumper::DumpPtxCode() {
} else {
dump_str = "[No source ptxs generated]\n\n" + info_.Message(idx);
}
Dump(FLAGS_cinn_dump_group_ptx, idx, "source_ptx.ptx", dump_str);
Dump(
FLAGS_cinn_dump_group_ptx, idx, device_id_, "source_ptx.ptx", dump_str);
}
}

Expand All @@ -145,16 +159,21 @@ void CompilationInfoDumper::DumpInstruction() {
} else {
dump_str = "[No instruction generated]\n\n" + info_.Message(idx);
}
Dump(FLAGS_cinn_dump_group_instruction, idx, "instruction.txt", dump_str);
Dump(FLAGS_cinn_dump_group_instruction,
idx,
device_id_,
"instruction.txt",
dump_str);
}
}

void CompilationInfoDumper::Dump(const std::string& base_path,
const int idx,
const int device_id,
const std::string& file_name,
const std::string& content) {
auto dump_path =
utils::StringFormat("%s/fusion_group_%d", base_path.c_str(), idx);
auto dump_path = utils::StringFormat(
"%s/device_%d/fusion_group_%d", base_path.c_str(), device_id, idx);
if (!hlir::framework::MakeDirectory(
dump_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) {
LOG(WARNING) << "Failed to make directory: \"" << dump_path
Expand Down
Loading

0 comments on commit bee08f6

Please sign in to comment.