Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Browse files Browse the repository at this point in the history
… nested-namespace-part-6
  • Loading branch information
walkalone20 committed Jun 4, 2024
2 parents 7deb15f + 4cb694e commit 09ac2c1
Show file tree
Hide file tree
Showing 751 changed files with 12,989 additions and 8,102 deletions.
8 changes: 4 additions & 4 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
# The basic usage is,
# clang-format -i -style=file PATH/TO/SOURCE/CODE
#
# The -style=file implicit use ".clang-format" file located in one of
# parent directory.
# The -style=file implicit use ".clang-format" file located in one of
# parent directory.
# The -i means inplace change.
#
# The document of clang-format is
# The document of clang-format is
# http://clang.llvm.org/docs/ClangFormat.html
# http://clang.llvm.org/docs/ClangFormatStyleOptions.html
---
Expand All @@ -20,7 +20,7 @@ IndentWidth: 2
TabWidth: 2
ContinuationIndentWidth: 4
AccessModifierOffset: -1 # The private/protected/public has no indent in class
Standard: Cpp11
Standard: Cpp11
AllowAllParametersOfDeclarationOnNextLine: true
BinPackParameters: false
BinPackArguments: false
Expand Down
1 change: 0 additions & 1 deletion .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ python/paddle/base/compiler.py @XiaoguangHu01 @zhiqiu @Xreki @qili93 @Aurelius84
python/paddle/base/dygraph/layers.py @JiabinYang @phlrain
python/paddle/base/framework.py @XiaoguangHu01 @zhiqiu @Xreki @qili93 @Aurelius84
python/paddle/base/__init__.py @phlrain @Aurelius84 @qili93
python/paddle/base/parallel_executor.py @Xreki @zhhsplendid @Aurelius84
python/paddle/base/tests/unittests/white_list/check_op_sequence_batch_1_input_white_list.py @Aurelius84 @phlrain
python/paddle/base/tests/unittests/white_list/check_op_sequence_instance_0_input_white_list.py @Aurelius84 @phlrain
python/paddle/base/tests/unittests/white_list/check_shape_white_list.py @hong19860320 @Aurelius84 @phlrain
Expand Down
7 changes: 5 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ if(WITH_GPU AND WITH_ROCM)
endif()

if(WITH_GPU AND NOT APPLE)
if(WITH_PIP_CUDA_LIBRARIES AND CMAKE_SYSTEM_NAME STREQUAL "Windows")
add_definitions(-DPADDLE_WITH_PIP_CUDA_LIBRARIES)
endif()
#(Note risemeup1): The cudart dynamic library libcudart.so is used by set CUDA_USE_STATIC_CUDA_RUNTIME and CMAKE_CUDA_FLAGS
if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL
"x86_64")
Expand All @@ -107,8 +110,8 @@ if(WITH_GPU AND NOT APPLE)
CACHE BOOL "" FORCE)
set(CMAKE_CUDA_FLAGS "--cudart shared")
if(WITH_PIP_CUDA_LIBRARIES)
#(Note risemeup1): Flag 'WITH_PIP_CUDA_LIBRARIES' will be used in dynamic_loader.cc to search for CUDA-related .so files through the Python libraries provided by NVIDIA.
add_definitions(-DWITH_PIP_CUDA_LIBRARIES)
#(Note risemeup1): Flag 'PADDLE_WITH_PIP_CUDA_LIBRARIES' will be used in dynamic_loader.cc to search for CUDA-related .so files through the Python libraries provided by NVIDIA.
add_definitions(-DPADDLE_WITH_PIP_CUDA_LIBRARIES)
endif()
endif()
enable_language(CUDA)
Expand Down
2 changes: 1 addition & 1 deletion cmake/PaddleConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
get_filename_component(PADDLE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_FILE}/../.." ABSOLUTE)

# include directories
set(PADDLE_INCLUDE_DIRS
set(PADDLE_INCLUDE_DIRS
${PADDLE_INSTALL_PREFIX}/include
${PADDLE_INSTALL_PREFIX}/include/third_party
)
Expand Down
2 changes: 1 addition & 1 deletion cmake/make_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"const unsigned char "
+ var
+ "[] = {"
+ ",".join(["0x%02x" % ord(c) for c in open(res).read()])
+ ",".join([f"0x{ord(c):02x}" for c in open(res).read()])
+ ",0};\n"
+ "const unsigned "
+ var
Expand Down
5 changes: 5 additions & 0 deletions paddle/cinn/ast_gen_ius/ast_gen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
} else {
iter_values.push_back(axis_vars[i]);
}
ir::TryElevateInt32ToInt64({ir::Expr(axis_vars[i]), shape[i]});
}
VLOG(4) << "iter_value.size() and block_vars.size() is "
<< iter_values.size() << " " << block_vars.size();
Expand Down Expand Up @@ -167,6 +168,7 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
} else {
reduce_iter_values.push_back(axis_vars[i]);
}
ir::TryElevateInt32ToInt64({ir::Expr(axis_vars[i]), shape[i]});
}
VLOG(4) << "ast gen: reduce body is after replace 0" << reduce_body;
for (int i = 0; i < reduce_axis.size(); ++i) {
Expand Down Expand Up @@ -227,6 +229,9 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
ir::ScheduleBlock::Make(
reduce_block_vars, {}, {}, tensor->name, reduce_body));
for (int i = static_cast<int>(reduce_axis.size()) - 1; i >= 0; --i) {
ir::TryElevateInt32ToInt64({reduce_axis[i],
reduce_axis[i]->lower_bound,
reduce_axis[i]->upper_bound});
reduce_body = ir::For::Make(reduce_axis[i],
reduce_axis[i]->lower_bound,
reduce_axis[i]->upper_bound,
Expand Down
18 changes: 13 additions & 5 deletions paddle/cinn/auto_schedule/analysis/analyze_ir.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#include "paddle/cinn/lang/lower.h"
#include "paddle/cinn/optim/optimize.h"
#include "paddle/cinn/optim/transform_gpu_forloop.h"

#include "paddle/common/enforce.h"
namespace cinn {
namespace auto_schedule {

Expand Down Expand Up @@ -193,10 +193,14 @@ ir::LoweredFunc UpdateFuncWithNewBody(const cinn::common::Target& target,
std::unordered_set<std::string> GetReduceLoopVarNames(const ir::Expr block) {
const ir::ScheduleBlockRealize* block_realize =
block.As<ir::ScheduleBlockRealize>();
CHECK_NOTNULL(block_realize);
PADDLE_ENFORCE_NOT_NULL(
block_realize,
phi::errors::InvalidArgument("The block is not a ScheduleBlockRealize"));
const ir::ScheduleBlock* block_node =
block_realize->schedule_block.As<ir::ScheduleBlock>();
CHECK_NOTNULL(block_node);
PADDLE_ENFORCE_NOT_NULL(
block_node,
phi::errors::InvalidArgument("The block is not a ScheduleBlock"));
std::vector<ir::Expr> iter_values = block_realize->iter_values;
std::vector<ir::Var> iter_vars = block_node->iter_vars;

Expand All @@ -218,10 +222,14 @@ std::unordered_set<std::string> GetReduceLoopVarNames(const ir::Expr block) {
std::string GetBlockName(const ir::Expr block) {
const ir::ScheduleBlockRealize* block_realize =
block.As<ir::ScheduleBlockRealize>();
CHECK_NOTNULL(block_realize);
PADDLE_ENFORCE_NOT_NULL(
block_realize,
phi::errors::InvalidArgument("The block is not a ScheduleBlockRealize"));
const ir::ScheduleBlock* block_node =
block_realize->schedule_block.As<ir::ScheduleBlock>();
CHECK_NOTNULL(block_node);
PADDLE_ENFORCE_NOT_NULL(
block_node,
phi::errors::InvalidArgument("The block is not a ScheduleBlock"));
return block_node->name;
}

Expand Down
9 changes: 5 additions & 4 deletions paddle/cinn/auto_schedule/auto_tuner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#include "paddle/cinn/hlir/framework/op.h"
#include "paddle/cinn/hlir/framework/visualize_helper.h"
#include "paddle/cinn/utils/string.h"

#include "paddle/common/enforce.h"
namespace cinn {
namespace auto_schedule {

Expand Down Expand Up @@ -144,9 +144,10 @@ void PrintResult(const TuningResult& result) {
}

TuningResult AutoTuner::Tune(const TuningOptions& options) {
CHECK_GT(options.num_tuning_rounds, 0) << "Invalid config";
VLOG(3) << "Begin tuning with round num=" << options.num_tuning_rounds
<< ", tasks size=" << tasks_.size();
PADDLE_ENFORCE_GT(options.num_tuning_rounds,
0,
phi::errors::InvalidArgument(
"The num_tuning_rounds should be greater than 0."));

TuningResult result;
result.subgraphs.resize(tasks_.size());
Expand Down
14 changes: 9 additions & 5 deletions paddle/cinn/auto_schedule/cost_model/expr_cost_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include "paddle/cinn/auto_schedule/search_space/search_state.h"
#include "paddle/cinn/common/target.h"
#include "paddle/cinn/ir/schedule/ir_schedule.h"

#include "paddle/common/enforce.h"
namespace cinn {
namespace auto_schedule {

Expand All @@ -45,8 +45,10 @@ void ExprCostModel::Train(const std::vector<const ir::ModuleExpr*>& samples,
const cinn::common::Target& target) {
trained_times_.store(1);
size_t total_size = samples.size();
CHECK_EQ(total_size, labels.size())
<< "Samples must have same size as labels";
PADDLE_ENFORCE_EQ(
total_size,
labels.size(),
phi::errors::InvalidArgument("Samples must have same size as labels"));
std::vector<std::vector<float>> train_feature_numbers(total_size);
FeatureExtractor extractor;
for (size_t i = 0; i < total_size; ++i) {
Expand All @@ -63,8 +65,10 @@ void ExprCostModel::Update(const std::vector<const ir::ModuleExpr*>& samples,
const cinn::common::Target& target) {
++trained_times_;
size_t total_size = samples.size();
CHECK_EQ(total_size, labels.size())
<< "Samples must have same size as labels";
PADDLE_ENFORCE_EQ(
total_size,
labels.size(),
phi::errors::InvalidArgument("Samples must have same size as labels"));
std::vector<std::vector<float>> train_feature_numbers(total_size);
FeatureExtractor extractor;
for (size_t i = 0; i < total_size; ++i) {
Expand Down
8 changes: 5 additions & 3 deletions paddle/cinn/auto_schedule/database/database.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include "paddle/cinn/auto_schedule/task/task_registry.h"
#include "paddle/cinn/ir/schedule/ir_schedule.h"
#include "paddle/cinn/ir/schedule/schedule_desc.h"

#include "paddle/common/enforce.h"
namespace cinn {
namespace auto_schedule {

Expand All @@ -42,8 +42,10 @@ proto::TuningRecord TuningRecord::ToProto() const {

Database::Database(int capacity_per_task)
: capacity_per_task_(capacity_per_task) {
CHECK_GT(capacity_per_task_, 0)
<< "capacity_per_task_ should be greater than 0";
PADDLE_ENFORCE_GT(capacity_per_task_,
0,
phi::errors::InvalidArgument(
"capacity_per_task_ should be greater than 0"));
}

std::unique_ptr<Database> Database::Make(const DatabaseConfig& config) {
Expand Down
8 changes: 5 additions & 3 deletions paddle/cinn/auto_schedule/measure/simple_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.

#include "paddle/cinn/auto_schedule/measure/simple_builder.h"

#include "paddle/common/enforce.h"
namespace cinn {
namespace auto_schedule {

Expand All @@ -25,8 +25,10 @@ SimpleBuilder::SimpleBuilder(hlir::framework::GraphCompiler* graph_compiler)
: graph_compiler_(graph_compiler) {}

BuildResult SimpleBuilder::Build(const MeasureInput& input) {
CHECK_NE(graph_compiler_, static_cast<GraphCompiler*>(nullptr))
<< "empty handle to GraphCompiler";
PADDLE_ENFORCE_NE(
graph_compiler_,
static_cast<GraphCompiler*>(nullptr),
phi::errors::InvalidArgument("empty handle to GraphCompiler"));
CompilationContext& context = graph_compiler_->GetCompilationContext();
context.groups.emplace_back(input.task->subgraph);
context.lowered_funcs.emplace_back(input.lowered_funcs);
Expand Down
21 changes: 16 additions & 5 deletions paddle/cinn/auto_schedule/measure/simple_runner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#include "paddle/cinn/hlir/framework/buffer.h"
#include "paddle/cinn/hlir/framework/scope.h"
#include "paddle/cinn/hlir/framework/tensor.h"

#include "paddle/common/enforce.h"
namespace cinn {
namespace auto_schedule {

Expand Down Expand Up @@ -76,8 +76,11 @@ static void PopulateRandomValue(const cinn::common::Type& type,
std::generate_n(
fmt_ptr, numel, [&engine, &dist]() { return dist(engine); });
} else {
CHECK_EQ(type.bytes(), 8)
<< "Unsupported type: " << type << ", type.bytes = " << type.bytes();
PADDLE_ENFORCE_EQ(
type.bytes(),
8,
phi::errors::Unimplemented("Unsupported type, the type.bytes is %d",
type.bytes()));
auto* fmt_ptr = reinterpret_cast<uint8_t*>(raw_ptr);
std::uniform_int_distribution<uint8_t> dist(
std::numeric_limits<uint8_t>::min(),
Expand Down Expand Up @@ -127,7 +130,12 @@ static std::unordered_set<std::string> ParamsNeedInitWithZero(
std::vector<int> param_idxs = kInitWithZeroParams.at(node->op()->name);
const auto& inlinks = node->inlinks_in_order();
for (int param_idx : param_idxs) {
CHECK_GT(inlinks.size(), param_idx);
PADDLE_ENFORCE_GT(inlinks.size(),
param_idx,
phi::errors::InvalidArgument(
"The input size of the node is less than the "
"index of the parameter that needs to be "
"initialized to 0"));
auto& edge = inlinks.at(param_idx);
std::string param_name =
edge->source()->as<hlir::framework::NodeData>()->id();
Expand All @@ -141,7 +149,10 @@ static std::unordered_set<std::string> ParamsNeedInitWithZero(
}

SimpleRunner::SimpleRunner(int repeat_times) : repeat_times_(repeat_times) {
CHECK_GT(repeat_times_, 0) << "repeat_times can't less than 0";
PADDLE_ENFORCE_GT(
repeat_times_,
0,
phi::errors::InvalidArgument("repeat_times should be greater than 0"));
}

// Prepare execution arguments of all instructions to run, a argument
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include "paddle/cinn/ir/ir_printer.h"
#include "paddle/cinn/ir/schedule/ir_schedule.h"
#include "paddle/cinn/ir/schedule/schedule_desc.h"

#include "paddle/common/enforce.h"
namespace cinn {
namespace auto_schedule {

Expand All @@ -29,7 +29,10 @@ int ExtractNumThreads(const ir::IRSchedule& ir_schedule,
if (step.type == "Bind" &&
step.attrs.find("thread_axis") != step.attrs.end() &&
absl::get<std::string>(step.attrs.at("thread_axis")) == bind_axis) {
CHECK_EQ(step.inputs.at("loop").size(), 1);
PADDLE_ENFORCE_EQ(step.inputs.at("loop").size(),
1,
phi::errors::InvalidArgument(
"The loop size of bind step should be 1"));
return step.inputs.at("loop")[0].As<ir::For>()->extent.as_int32();
}
}
Expand Down
40 changes: 29 additions & 11 deletions paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include "paddle/cinn/ir/schedule_block_graph.h"
#include "paddle/cinn/ir/utils/ir_copy.h"
#include "paddle/cinn/ir/utils/ir_nodes_collector.h"

#include "paddle/common/enforce.h"
namespace cinn {
namespace auto_schedule {

Expand All @@ -40,8 +40,11 @@ bool IsSpatialLoop(const ir::For* for_node) {
const auto* schedule_block =
block_realize->schedule_block.As<ir::ScheduleBlock>();
CHECK(schedule_block) << "schedule_block field is not a ScheduleBlock";
CHECK_EQ(block_realize->iter_values.size(),
schedule_block->iter_vars.size());
PADDLE_ENFORCE_EQ(
block_realize->iter_values.size(),
schedule_block->iter_vars.size(),
phi::errors::InvalidArgument(
"The size of iter_values and iter_vars should be equal."));
for (int i = 0; i < block_realize->iter_values.size(); ++i) {
const ir::Var& iter_var = schedule_block->iter_vars[i];
const ir::Expr& binding = block_realize->iter_values[i];
Expand Down Expand Up @@ -93,10 +96,16 @@ void BindGPUIndex(ir::IRSchedule* ir_schedule,
int max_blocks,
int max_threads_per_block) {
auto all_loops = ir_schedule->GetLoops(block_name);
CHECK_LE(num_loops_to_bind, all_loops.size())
<< "The number of loops to be bind is greater than size of all_loops";
CHECK_GE(num_loops_to_bind, 0)
<< "The number of loops to be bind should be greater than 0";
PADDLE_ENFORCE_LE(
num_loops_to_bind,
all_loops.size(),
phi::errors::InvalidArgument(
"The number of loops to be bind is greater than size of all_loops"));
PADDLE_ENFORCE_GE(
num_loops_to_bind,
0,
phi::errors::InvalidArgument(
"The number of loops to be bind should be greater than 0"));
// check whether it is the case that threadIdx has been binded but blockIdx
// not, the threadIdx can only be binded in the first loop after
// num_loops_to_bind loops because we has excluded other cases in
Expand Down Expand Up @@ -130,13 +139,19 @@ void BindGPUIndex(ir::IRSchedule* ir_schedule,

if (extent <= max_blocks * max_threads_per_block) {
auto splits = ir_schedule->Split(fused_loop, {-1, max_threads_per_block});
CHECK_EQ(splits.size(), 2);
PADDLE_ENFORCE_EQ(
splits.size(),
2,
phi::errors::InvalidArgument("The size of splits should be 2."));
ir_schedule->Bind(splits[0], "blockIdx.x");
ir_schedule->Bind(splits[1], "threadIdx.x");
} else {
auto splits =
ir_schedule->Split(fused_loop, {-1, max_blocks, max_threads_per_block});
CHECK_EQ(splits.size(), 3);
PADDLE_ENFORCE_EQ(
splits.size(),
3,
phi::errors::InvalidArgument("The size of splits should be 3."));
ir_schedule->Reorder({splits[1], splits[2], splits[0]});
all_loops = ir_schedule->GetLoops(block_name);
ir_schedule->Bind(all_loops[0], "blockIdx.x");
Expand All @@ -160,8 +175,11 @@ RuleApplyType AutoBind::Init(ir::IRSchedule* ir_schedule) {
}

void AutoBind::Apply(int index) {
CHECK_LT(index, applicable_schedule_blocks_.size())
<< "invalid apply index:" << index;
PADDLE_ENFORCE_LT(
index,
applicable_schedule_blocks_.size(),
phi::errors::InvalidArgument(
"The index should be less than size of applicable_schedule_blocks_"));
auto applied_block = applicable_schedule_blocks_.at(index);
auto all_loops = ir_schedule_->GetLoops(applied_block);
BindGPUIndex(ir_schedule_,
Expand Down
Loading

0 comments on commit 09ac2c1

Please sign in to comment.