Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Browse files Browse the repository at this point in the history
… nested-namespace-part-12
  • Loading branch information
walkalone20 committed Jun 10, 2024
2 parents d1663c9 + ed8168d commit 24c6d16
Show file tree
Hide file tree
Showing 1,684 changed files with 30,044 additions and 20,987 deletions.
8 changes: 4 additions & 4 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
# The basic usage is,
# clang-format -i -style=file PATH/TO/SOURCE/CODE
#
# The -style=file implicit use ".clang-format" file located in one of
# parent directory.
# The -style=file implicit use ".clang-format" file located in one of
# parent directory.
# The -i means inplace change.
#
# The document of clang-format is
# The document of clang-format is
# http://clang.llvm.org/docs/ClangFormat.html
# http://clang.llvm.org/docs/ClangFormatStyleOptions.html
---
Expand All @@ -20,7 +20,7 @@ IndentWidth: 2
TabWidth: 2
ContinuationIndentWidth: 4
AccessModifierOffset: -1 # The private/protected/public has no indent in class
Standard: Cpp11
Standard: Cpp11
AllowAllParametersOfDeclarationOnNextLine: true
BinPackParameters: false
BinPackArguments: false
Expand Down
1 change: 0 additions & 1 deletion .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ python/paddle/base/compiler.py @XiaoguangHu01 @zhiqiu @Xreki @qili93 @Aurelius84
python/paddle/base/dygraph/layers.py @JiabinYang @phlrain
python/paddle/base/framework.py @XiaoguangHu01 @zhiqiu @Xreki @qili93 @Aurelius84
python/paddle/base/__init__.py @phlrain @Aurelius84 @qili93
python/paddle/base/parallel_executor.py @Xreki @zhhsplendid @Aurelius84
python/paddle/base/tests/unittests/white_list/check_op_sequence_batch_1_input_white_list.py @Aurelius84 @phlrain
python/paddle/base/tests/unittests/white_list/check_op_sequence_instance_0_input_white_list.py @Aurelius84 @phlrain
python/paddle/base/tests/unittests/white_list/check_shape_white_list.py @hong19860320 @Aurelius84 @phlrain
Expand Down
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,7 @@ paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/autogen_tmp/*
paddle/fluid/pybind/static_op_function.*
paddle/fluid/pybind/ops_api.cc
python/paddle/tensor/tensor.pyi
paddle/phi/kernels/fusion/cutlass/conv2d/build
paddle/phi/kernels/fusion/cutlass/conv2d/cutlass
paddle/phi/kernels/fusion/cutlass/gemm_epilogue/build
paddle/phi/kernels/fusion/cutlass/gemm_epilogue/cutlass
2 changes: 0 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ repos:
- id: sort-simple-yaml
files: (ops|backward|op_[a-z_]+)\.yaml$
- id: trailing-whitespace
files: (.*\.(py|bzl|md|rst|c|cc|cxx|cpp|cu|h|hpp|hxx|xpu|kps|cmake|yaml|yml|hook)|BUILD|.*\.BUILD|WORKSPACE|CMakeLists\.txt)$
- repo: https://github.com/Lucas-C/pre-commit-hooks.git
rev: v1.5.1
hooks:
Expand Down Expand Up @@ -55,7 +54,6 @@ repos:
rev: 23.3.0
hooks:
- id: black
files: (.*\.(py|pyi|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.5
hooks:
Expand Down
7 changes: 5 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ if(WITH_GPU AND WITH_ROCM)
endif()

if(WITH_GPU AND NOT APPLE)
if(WITH_PIP_CUDA_LIBRARIES AND CMAKE_SYSTEM_NAME STREQUAL "Windows")
add_definitions(-DPADDLE_WITH_PIP_CUDA_LIBRARIES)
endif()
#(Note risemeup1): The cudart dynamic library libcudart.so is used by set CUDA_USE_STATIC_CUDA_RUNTIME and CMAKE_CUDA_FLAGS
if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL
"x86_64")
Expand All @@ -107,8 +110,8 @@ if(WITH_GPU AND NOT APPLE)
CACHE BOOL "" FORCE)
set(CMAKE_CUDA_FLAGS "--cudart shared")
if(WITH_PIP_CUDA_LIBRARIES)
#(Note risemeup1): Flag 'WITH_PIP_CUDA_LIBRARIES' will be used in dynamic_loader.cc to search for CUDA-related .so files through the Python libraries provided by NVIDIA.
add_definitions(-DWITH_PIP_CUDA_LIBRARIES)
#(Note risemeup1): Flag 'PADDLE_WITH_PIP_CUDA_LIBRARIES' will be used in dynamic_loader.cc to search for CUDA-related .so files through the Python libraries provided by NVIDIA.
add_definitions(-DPADDLE_WITH_PIP_CUDA_LIBRARIES)
endif()
endif()
enable_language(CUDA)
Expand Down
2 changes: 1 addition & 1 deletion cmake/PaddleConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
get_filename_component(PADDLE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_FILE}/../.." ABSOLUTE)

# include directories
set(PADDLE_INCLUDE_DIRS
set(PADDLE_INCLUDE_DIRS
${PADDLE_INSTALL_PREFIX}/include
${PADDLE_INSTALL_PREFIX}/include/third_party
)
Expand Down
4 changes: 4 additions & 0 deletions cmake/cinn/external/absl.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ set(ABSL_LIB_NAMES
raw_hash_set)
set(ABSL_LIBS "")

if(WITH_ROCM)
list(APPEND ABSL_LIB_NAMES strings_internal raw_logging_internal)
endif()

add_library(absl STATIC IMPORTED GLOBAL)
set_property(TARGET absl PROPERTY IMPORTED_LOCATION
${ABSL_INSTALL_DIR}/lib/libabsl_base.a)
Expand Down
6 changes: 1 addition & 5 deletions cmake/hip.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,7 @@ list(APPEND HIP_CXX_FLAGS -Wno-unused-local-typedef)
list(APPEND HIP_CXX_FLAGS -Wno-missing-braces)
list(APPEND HIP_CXX_FLAGS -Wno-sometimes-uninitialized)

if(WITH_CINN)
list(APPEND HIP_CXX_FLAGS -std=c++14)
else()
list(APPEND HIP_CXX_FLAGS -std=c++17)
endif()
list(APPEND HIP_CXX_FLAGS -std=c++17)
list(APPEND HIP_CXX_FLAGS --gpu-max-threads-per-block=1024)

if(CMAKE_BUILD_TYPE MATCHES Debug)
Expand Down
8 changes: 8 additions & 0 deletions cmake/inference_lib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,14 @@ else()
inference_lib_dist
SRCS ${paddle_phi_lib}
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
if(WITH_GPU OR WITH_ROCM)
set(paddle_phi_kernel_gpu_lib
${PADDLE_BINARY_DIR}/paddle/phi/libphi_kernel_gpu.*)
copy(
inference_lib_dist
SRCS ${paddle_phi_kernel_gpu_lib}
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
endif()
endif()
endif()

Expand Down
2 changes: 1 addition & 1 deletion cmake/make_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"const unsigned char "
+ var
+ "[] = {"
+ ",".join(["0x%02x" % ord(c) for c in open(res).read()])
+ ",".join([f"0x{ord(c):02x}" for c in open(res).read()])
+ ",0};\n"
+ "const unsigned "
+ var
Expand Down
4 changes: 2 additions & 2 deletions paddle/.set_python_path.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
# limitations under the License.

#
# A simple test driver for cmake.
# A simple test driver for cmake.
# set PYTHONPATH before run command.
# Usage:
# ./.set_python_pash.sh -p YOUR_PYTHON_PATH {exec...}
#
#
# It same as PYTHONPATH=${YOUR_PYTHON_PATH}:$PYTHONPATH {exec...}
#
PYPATH=""
Expand Down
5 changes: 5 additions & 0 deletions paddle/cinn/ast_gen_ius/ast_gen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
} else {
iter_values.push_back(axis_vars[i]);
}
ir::TryElevateInt32ToInt64({ir::Expr(axis_vars[i]), shape[i]});
}
VLOG(4) << "iter_value.size() and block_vars.size() is "
<< iter_values.size() << " " << block_vars.size();
Expand Down Expand Up @@ -167,6 +168,7 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
} else {
reduce_iter_values.push_back(axis_vars[i]);
}
ir::TryElevateInt32ToInt64({ir::Expr(axis_vars[i]), shape[i]});
}
VLOG(4) << "ast gen: reduce body is after replace 0" << reduce_body;
for (int i = 0; i < reduce_axis.size(); ++i) {
Expand Down Expand Up @@ -227,6 +229,9 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
ir::ScheduleBlock::Make(
reduce_block_vars, {}, {}, tensor->name, reduce_body));
for (int i = static_cast<int>(reduce_axis.size()) - 1; i >= 0; --i) {
ir::TryElevateInt32ToInt64({reduce_axis[i],
reduce_axis[i]->lower_bound,
reduce_axis[i]->upper_bound});
reduce_body = ir::For::Make(reduce_axis[i],
reduce_axis[i]->lower_bound,
reduce_axis[i]->upper_bound,
Expand Down
18 changes: 13 additions & 5 deletions paddle/cinn/auto_schedule/analysis/analyze_ir.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#include "paddle/cinn/lang/lower.h"
#include "paddle/cinn/optim/optimize.h"
#include "paddle/cinn/optim/transform_gpu_forloop.h"

#include "paddle/common/enforce.h"
namespace cinn {
namespace auto_schedule {

Expand Down Expand Up @@ -193,10 +193,14 @@ ir::LoweredFunc UpdateFuncWithNewBody(const cinn::common::Target& target,
std::unordered_set<std::string> GetReduceLoopVarNames(const ir::Expr block) {
const ir::ScheduleBlockRealize* block_realize =
block.As<ir::ScheduleBlockRealize>();
CHECK_NOTNULL(block_realize);
PADDLE_ENFORCE_NOT_NULL(
block_realize,
phi::errors::InvalidArgument("The block is not a ScheduleBlockRealize"));
const ir::ScheduleBlock* block_node =
block_realize->schedule_block.As<ir::ScheduleBlock>();
CHECK_NOTNULL(block_node);
PADDLE_ENFORCE_NOT_NULL(
block_node,
phi::errors::InvalidArgument("The block is not a ScheduleBlock"));
std::vector<ir::Expr> iter_values = block_realize->iter_values;
std::vector<ir::Var> iter_vars = block_node->iter_vars;

Expand All @@ -218,10 +222,14 @@ std::unordered_set<std::string> GetReduceLoopVarNames(const ir::Expr block) {
std::string GetBlockName(const ir::Expr block) {
const ir::ScheduleBlockRealize* block_realize =
block.As<ir::ScheduleBlockRealize>();
CHECK_NOTNULL(block_realize);
PADDLE_ENFORCE_NOT_NULL(
block_realize,
phi::errors::InvalidArgument("The block is not a ScheduleBlockRealize"));
const ir::ScheduleBlock* block_node =
block_realize->schedule_block.As<ir::ScheduleBlock>();
CHECK_NOTNULL(block_node);
PADDLE_ENFORCE_NOT_NULL(
block_node,
phi::errors::InvalidArgument("The block is not a ScheduleBlock"));
return block_node->name;
}

Expand Down
9 changes: 5 additions & 4 deletions paddle/cinn/auto_schedule/auto_tuner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#include "paddle/cinn/hlir/framework/op.h"
#include "paddle/cinn/hlir/framework/visualize_helper.h"
#include "paddle/cinn/utils/string.h"

#include "paddle/common/enforce.h"
namespace cinn {
namespace auto_schedule {

Expand Down Expand Up @@ -144,9 +144,10 @@ void PrintResult(const TuningResult& result) {
}

TuningResult AutoTuner::Tune(const TuningOptions& options) {
CHECK_GT(options.num_tuning_rounds, 0) << "Invalid config";
VLOG(3) << "Begin tuning with round num=" << options.num_tuning_rounds
<< ", tasks size=" << tasks_.size();
PADDLE_ENFORCE_GT(options.num_tuning_rounds,
0,
phi::errors::InvalidArgument(
"The num_tuning_rounds should be greater than 0."));

TuningResult result;
result.subgraphs.resize(tasks_.size());
Expand Down
14 changes: 9 additions & 5 deletions paddle/cinn/auto_schedule/cost_model/expr_cost_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include "paddle/cinn/auto_schedule/search_space/search_state.h"
#include "paddle/cinn/common/target.h"
#include "paddle/cinn/ir/schedule/ir_schedule.h"

#include "paddle/common/enforce.h"
namespace cinn {
namespace auto_schedule {

Expand All @@ -45,8 +45,10 @@ void ExprCostModel::Train(const std::vector<const ir::ModuleExpr*>& samples,
const cinn::common::Target& target) {
trained_times_.store(1);
size_t total_size = samples.size();
CHECK_EQ(total_size, labels.size())
<< "Samples must have same size as labels";
PADDLE_ENFORCE_EQ(
total_size,
labels.size(),
phi::errors::InvalidArgument("Samples must have same size as labels"));
std::vector<std::vector<float>> train_feature_numbers(total_size);
FeatureExtractor extractor;
for (size_t i = 0; i < total_size; ++i) {
Expand All @@ -63,8 +65,10 @@ void ExprCostModel::Update(const std::vector<const ir::ModuleExpr*>& samples,
const cinn::common::Target& target) {
++trained_times_;
size_t total_size = samples.size();
CHECK_EQ(total_size, labels.size())
<< "Samples must have same size as labels";
PADDLE_ENFORCE_EQ(
total_size,
labels.size(),
phi::errors::InvalidArgument("Samples must have same size as labels"));
std::vector<std::vector<float>> train_feature_numbers(total_size);
FeatureExtractor extractor;
for (size_t i = 0; i < total_size; ++i) {
Expand Down
8 changes: 5 additions & 3 deletions paddle/cinn/auto_schedule/database/database.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include "paddle/cinn/auto_schedule/task/task_registry.h"
#include "paddle/cinn/ir/schedule/ir_schedule.h"
#include "paddle/cinn/ir/schedule/schedule_desc.h"

#include "paddle/common/enforce.h"
namespace cinn {
namespace auto_schedule {

Expand All @@ -42,8 +42,10 @@ proto::TuningRecord TuningRecord::ToProto() const {

Database::Database(int capacity_per_task)
: capacity_per_task_(capacity_per_task) {
CHECK_GT(capacity_per_task_, 0)
<< "capacity_per_task_ should be greater than 0";
PADDLE_ENFORCE_GT(capacity_per_task_,
0,
phi::errors::InvalidArgument(
"capacity_per_task_ should be greater than 0"));
}

std::unique_ptr<Database> Database::Make(const DatabaseConfig& config) {
Expand Down
8 changes: 5 additions & 3 deletions paddle/cinn/auto_schedule/measure/simple_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.

#include "paddle/cinn/auto_schedule/measure/simple_builder.h"

#include "paddle/common/enforce.h"
namespace cinn {
namespace auto_schedule {

Expand All @@ -25,8 +25,10 @@ SimpleBuilder::SimpleBuilder(hlir::framework::GraphCompiler* graph_compiler)
: graph_compiler_(graph_compiler) {}

BuildResult SimpleBuilder::Build(const MeasureInput& input) {
CHECK_NE(graph_compiler_, static_cast<GraphCompiler*>(nullptr))
<< "empty handle to GraphCompiler";
PADDLE_ENFORCE_NE(
graph_compiler_,
static_cast<GraphCompiler*>(nullptr),
phi::errors::InvalidArgument("empty handle to GraphCompiler"));
CompilationContext& context = graph_compiler_->GetCompilationContext();
context.groups.emplace_back(input.task->subgraph);
context.lowered_funcs.emplace_back(input.lowered_funcs);
Expand Down
21 changes: 16 additions & 5 deletions paddle/cinn/auto_schedule/measure/simple_runner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#include "paddle/cinn/hlir/framework/buffer.h"
#include "paddle/cinn/hlir/framework/scope.h"
#include "paddle/cinn/hlir/framework/tensor.h"

#include "paddle/common/enforce.h"
namespace cinn {
namespace auto_schedule {

Expand Down Expand Up @@ -76,8 +76,11 @@ static void PopulateRandomValue(const cinn::common::Type& type,
std::generate_n(
fmt_ptr, numel, [&engine, &dist]() { return dist(engine); });
} else {
CHECK_EQ(type.bytes(), 8)
<< "Unsupported type: " << type << ", type.bytes = " << type.bytes();
PADDLE_ENFORCE_EQ(
type.bytes(),
8,
phi::errors::Unimplemented("Unsupported type, the type.bytes is %d",
type.bytes()));
auto* fmt_ptr = reinterpret_cast<uint8_t*>(raw_ptr);
std::uniform_int_distribution<uint8_t> dist(
std::numeric_limits<uint8_t>::min(),
Expand Down Expand Up @@ -127,7 +130,12 @@ static std::unordered_set<std::string> ParamsNeedInitWithZero(
std::vector<int> param_idxs = kInitWithZeroParams.at(node->op()->name);
const auto& inlinks = node->inlinks_in_order();
for (int param_idx : param_idxs) {
CHECK_GT(inlinks.size(), param_idx);
PADDLE_ENFORCE_GT(inlinks.size(),
param_idx,
phi::errors::InvalidArgument(
"The input size of the node is less than the "
"index of the parameter that needs to be "
"initialized to 0"));
auto& edge = inlinks.at(param_idx);
std::string param_name =
edge->source()->as<hlir::framework::NodeData>()->id();
Expand All @@ -141,7 +149,10 @@ static std::unordered_set<std::string> ParamsNeedInitWithZero(
}

SimpleRunner::SimpleRunner(int repeat_times) : repeat_times_(repeat_times) {
CHECK_GT(repeat_times_, 0) << "repeat_times can't less than 0";
PADDLE_ENFORCE_GT(
repeat_times_,
0,
phi::errors::InvalidArgument("repeat_times should be greater than 0"));
}

// Prepare execution arguments of all instructions to run, a argument
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include "paddle/cinn/ir/ir_printer.h"
#include "paddle/cinn/ir/schedule/ir_schedule.h"
#include "paddle/cinn/ir/schedule/schedule_desc.h"

#include "paddle/common/enforce.h"
namespace cinn {
namespace auto_schedule {

Expand All @@ -29,7 +29,10 @@ int ExtractNumThreads(const ir::IRSchedule& ir_schedule,
if (step.type == "Bind" &&
step.attrs.find("thread_axis") != step.attrs.end() &&
absl::get<std::string>(step.attrs.at("thread_axis")) == bind_axis) {
CHECK_EQ(step.inputs.at("loop").size(), 1);
PADDLE_ENFORCE_EQ(step.inputs.at("loop").size(),
1,
phi::errors::InvalidArgument(
"The loop size of bind step should be 1"));
return step.inputs.at("loop")[0].As<ir::For>()->extent.as_int32();
}
}
Expand Down
Loading

0 comments on commit 24c6d16

Please sign in to comment.